diff --git a/swh/web/api/apidoc.py b/swh/web/api/apidoc.py
index 1b9154aab..1d1fbc5e6 100644
--- a/swh/web/api/apidoc.py
+++ b/swh/web/api/apidoc.py
@@ -1,356 +1,356 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import functools
import os
import re
from functools import wraps
from rest_framework.decorators import api_view
from swh.web.common.utils import parse_rst
from swh.web.api.apiurls import APIUrls
from swh.web.api.apiresponse import make_api_response, error_response
class _HTTPDomainDocVisitor(docutils.nodes.NodeVisitor):
"""
docutils visitor for walking on a parsed rst document containing sphinx
httpdomain roles. Its purpose is to extract relevant info regarding swh
api endpoints (for instance url arguments) from their docstring written
using sphinx httpdomain.
"""
# httpdomain roles we want to parse (based on sphinxcontrib.httpdomain 1.6)
parameter_roles = ('param', 'parameter', 'arg', 'argument')
response_json_object_roles = ('resjsonobj', 'resjson', '>jsonobj', '>json')
response_json_array_roles = ('resjsonarr', '>jsonarr')
query_parameter_roles = ('queryparameter', 'queryparam', 'qparam', 'query')
request_header_roles = ('header', 'resheader', 'responseheader')
status_code_roles = ('statuscode', 'status', 'code')
def __init__(self, document, urls, data):
super().__init__(document)
self.urls = urls
self.url_idx = 0
self.data = data
self.args_set = set()
self.params_set = set()
self.returns_set = set()
self.status_codes_set = set()
self.reqheaders_set = set()
self.resheaders_set = set()
self.field_list_visited = False
def process_paragraph(self, par):
"""
Process extracted paragraph text before display.
Cleanup document model markups and transform the
paragraph into a valid raw rst string (as the apidoc
documentation transform rst to html when rendering).
"""
par = par.replace('\n', ' ')
# keep empahasized and strong text
par = par.replace('', '*')
par = par.replace('', '*')
par = par.replace('', '**')
par = par.replace('', '**')
# remove parsed document markups
par = re.sub('<[^<]+?>', '', par)
# api urls cleanup to generate valid links afterwards
par = re.sub('\(\w+\)', '', par) # noqa
par = re.sub('\[.*\]', '', par) # noqa
par = par.replace('//', '/')
# transform references to api endpoints into valid rst links
par = re.sub(':http:get:`(.*)`', r'`<\1>`_', par)
# transform references to some elements into bold text
par = re.sub(':http:header:`(.*)`', r'**\1**', par)
par = re.sub(':func:`(.*)`', r'**\1**', par)
return par
def visit_field_list(self, node):
"""
Visit parsed rst field lists to extract relevant info
regarding api endpoint.
"""
self.field_list_visited = True
for child in node.traverse():
# get the parsed field name
if isinstance(child, docutils.nodes.field_name):
field_name = child.astext()
# parse field text
elif isinstance(child, docutils.nodes.paragraph):
text = self.process_paragraph(str(child))
field_data = field_name.split(' ')
# Parameters
if field_data[0] in self.parameter_roles:
if field_data[2] not in self.args_set:
self.data['args'].append({'name': field_data[2],
'type': field_data[1],
'doc': text})
self.args_set.add(field_data[2])
# Query Parameters
if field_data[0] in self.query_parameter_roles:
if field_data[2] not in self.params_set:
self.data['params'].append({'name': field_data[2],
'type': field_data[1],
'doc': text})
self.params_set.add(field_data[2])
# Response type
if field_data[0] in self.response_json_array_roles or \
field_data[0] in self.response_json_object_roles:
# array
if field_data[0] in self.response_json_array_roles:
self.data['return_type'] = 'array'
# object
else:
self.data['return_type'] = 'object'
# returned object field
if field_data[2] not in self.returns_set:
self.data['returns'].append({'name': field_data[2],
'type': field_data[1],
'doc': text})
self.returns_set.add(field_data[2])
# Status Codes
if field_data[0] in self.status_code_roles:
if field_data[1] not in self.status_codes_set:
self.data['status_codes'].append({'code': field_data[1], # noqa
'doc': text})
self.status_codes_set.add(field_data[1])
# Request Headers
if field_data[0] in self.request_header_roles:
if field_data[1] not in self.reqheaders_set:
self.data['reqheaders'].append({'name': field_data[1],
'doc': text})
self.reqheaders_set.add(field_data[1])
# Response Headers
if field_data[0] in self.response_header_roles:
if field_data[1] not in self.resheaders_set:
resheader = {'name': field_data[1],
'doc': text}
self.data['resheaders'].append(resheader)
self.resheaders_set.add(field_data[1])
if resheader['name'] == 'Content-Type' and \
resheader['doc'] == 'application/octet-stream':
self.data['return_type'] = 'octet stream'
def visit_paragraph(self, node):
"""
Visit relevant paragraphs to parse
"""
# only parsed top level paragraphs
if isinstance(node.parent, docutils.nodes.block_quote):
text = self.process_paragraph(str(node))
# endpoint description
if not text.startswith('**') and self.data['description'] != text:
self.data['description'] += '\n\n' if self.data['description'] else '' # noqa
self.data['description'] += text
# http methods
elif text.startswith('**Allowed HTTP Methods:**'):
text = text.replace('**Allowed HTTP Methods:**', '')
http_methods = text.strip().split(',')
http_methods = [m[m.find('`')+1:-1].upper()
for m in http_methods]
self.data['urls'].append({'rule': self.urls[self.url_idx],
'methods': http_methods})
self.url_idx += 1
def visit_literal_block(self, node):
"""
Visit litteral blocks
"""
text = node.astext()
# litteral block in endpoint description
if not self.field_list_visited:
self.data['description'] += ':\n\n\t%s' % text
# extract example url
if ':swh_web_api:' in text:
self.data['examples'].append(
'/api/1/' + re.sub('.*`(.*)`.*', r'\1', text))
def visit_bullet_list(self, node):
# bullet list in endpoint description
if not self.field_list_visited:
self.data['description'] += '\n\n'
for child in node.traverse():
# process list item
if isinstance(child, docutils.nodes.paragraph):
line_text = self.process_paragraph(str(child))
self.data['description'] += '\t* %s\n' % line_text
def unknown_visit(self, node):
pass
def depart_document(self, node):
"""
End of parsing extra processing
"""
default_methods = ['GET', 'HEAD', 'OPTIONS']
# ensure urls info is present and set default http methods
if not self.data['urls']:
for url in self.urls:
self.data['urls'].append({'rule': url,
'methods': default_methods})
def unknown_departure(self, node):
pass
def _parse_httpdomain_doc(doc, data):
doc_lines = doc.split('\n')
doc_lines_filtered = []
urls = []
# httpdomain is a sphinx extension that is unknown to docutils but
# fortunately we can still parse its directives' content,
# so remove lines with httpdomain directives before executing the
# rst parser from docutils
for doc_line in doc_lines:
if '.. http' not in doc_line:
doc_lines_filtered.append(doc_line)
else:
url = doc_line[doc_line.find('/'):]
# emphasize url arguments for html rendering
url = re.sub(r'\((\w+)\)', r' **\(\1\)** ', url)
urls.append(url)
# parse the rst doctring and do not print system messages about
# unknown httpdomain roles
document = parse_rst('\n'.join(doc_lines_filtered), report_level=5)
# remove the system_message nodes from the parsed document
for node in document.traverse(docutils.nodes.system_message):
node.parent.remove(node)
# visit the document nodes to extract relevant endpoint info
visitor = _HTTPDomainDocVisitor(document, urls, data)
document.walkabout(visitor)
class APIDocException(Exception):
"""
Custom exception to signal errors in the use of the APIDoc decorators
"""
class api_doc(object): # noqa: N801
"""
Decorate an API function to register it in the API doc route index
and create the corresponding DRF route.
Args:
route (str): documentation page's route
noargs (boolean): set to True if the route has no arguments, and its
result should be displayed anytime its documentation
is requested. Default to False
tags (list): Further information on api endpoints. Two values are
possibly expected:
* hidden: remove the entry points from the listing
* upcoming: display the entry point but it is not followable
handle_response (boolean): indicate if the decorated function takes
care of creating the HTTP response or delegates that task to the
apiresponse module
api_version (str): api version string
"""
def __init__(self, route, noargs=False, tags=[], handle_response=False,
api_version='1'):
super().__init__()
self.route = route
self.urlpattern = '^' + api_version + route + '$'
self.noargs = noargs
self.tags = set(tags)
self.handle_response = handle_response
# @api_doc() Decorator call
def __call__(self, f):
# If the route is not hidden, add it to the index
if 'hidden' not in self.tags:
doc_data = self.get_doc_data(f)
doc_desc = doc_data['description']
first_dot_pos = doc_desc.find('.')
APIUrls.add_route(self.route, doc_desc[:first_dot_pos+1],
tags=self.tags)
# If the decorated route has arguments, we create a specific
# documentation view
if not self.noargs:
@api_view(['GET', 'HEAD'])
def doc_view(request):
doc_data = self.get_doc_data(f)
return make_api_response(request, None, doc_data)
- view_name = self.route[1:-1].replace('/', '-')
+ view_name = 'api-%s' % self.route[1:-1].replace('/', '-')
APIUrls.add_url_pattern(self.urlpattern, doc_view, view_name)
@wraps(f)
def documented_view(request, **kwargs):
doc_data = self.get_doc_data(f)
try:
response = f(request, **kwargs)
except Exception as exc:
return error_response(request, exc, doc_data)
if self.handle_response:
return response
else:
return make_api_response(request, response, doc_data)
return documented_view
@functools.lru_cache(maxsize=32)
def get_doc_data(self, f):
"""
Build documentation data for the decorated api endpoint function
"""
data = {
'description': '',
'response_data': None,
'urls': [],
'args': [],
'params': [],
'resheaders': [],
'reqheaders': [],
'return_type': '',
'returns': [],
'status_codes': [],
'examples': [],
'route': self.route,
'noargs': self.noargs
}
if not f.__doc__:
raise APIDocException('apidoc %s: expected a docstring'
' for function %s'
% (self.__class__.__name__, f.__name__))
# use raw docstring as endpoint documentation if sphinx
# httpdomain is not used
if '.. http' not in f.__doc__:
data['description'] = f.__doc__
# else parse the sphinx httpdomain docstring with docutils
# (except when building the swh-web documentation through autodoc
# sphinx extension, not needed and raise errors with sphinx >= 1.7)
elif 'SWH_WEB_DOC_BUILD' not in os.environ:
_parse_httpdomain_doc(f.__doc__, data)
# process returned object info for nicer html display
returns_list = ''
for ret in data['returns']:
returns_list += '\t* **%s (%s)**: %s\n' %\
(ret['name'], ret['type'], ret['doc'])
data['returns_list'] = returns_list
return data
diff --git a/swh/web/api/apiurls.py b/swh/web/api/apiurls.py
index 1a3f1a905..2b661e709 100644
--- a/swh/web/api/apiurls.py
+++ b/swh/web/api/apiurls.py
@@ -1,79 +1,79 @@
# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from rest_framework.decorators import api_view
from swh.web.common.urlsindex import UrlsIndex
from swh.web.common.throttling import throttle_scope
class APIUrls(UrlsIndex):
"""
Class to manage API documentation URLs.
- Indexes all routes documented using apidoc's decorators.
- Tracks endpoint/request processing method relationships for use in
generating related urls in API documentation
"""
_apidoc_routes = {}
_method_endpoints = {}
scope = 'api'
@classmethod
def get_app_endpoints(cls):
return cls._apidoc_routes
@classmethod
def add_route(cls, route, docstring, **kwargs):
"""
Add a route to the self-documenting API reference
"""
- route_view_name = route[1:-1].replace('/', '-')
+ route_view_name = 'api-%s' % route[1:-1].replace('/', '-')
if route not in cls._apidoc_routes:
d = {'docstring': docstring,
'route_view_name': route_view_name}
for k, v in kwargs.items():
d[k] = v
cls._apidoc_routes[route] = d
class api_route(object): # noqa: N801
"""
Decorator to ease the registration of an API endpoint
using the Django REST Framework.
Args:
url_pattern: the url pattern used by DRF to identify the API route
view_name: the name of the API view associated to the route used to
reverse the url
methods: array of HTTP methods supported by the API route
"""
def __init__(self, url_pattern=None, view_name=None,
methods=['GET', 'HEAD', 'OPTIONS'],
throttle_scope='swh_api',
api_version='1'):
super().__init__()
self.url_pattern = '^' + api_version + url_pattern + '$'
self.view_name = view_name
self.methods = methods
self.throttle_scope = throttle_scope
def __call__(self, f):
# create a DRF view from the wrapped function
@api_view(self.methods)
@throttle_scope(self.throttle_scope)
def api_view_f(*args, **kwargs):
return f(*args, **kwargs)
# small hacks for correctly generating API endpoints index doc
api_view_f.__name__ = f.__name__
api_view_f.http_method_names = self.methods
# register the route and its view in the endpoints index
APIUrls.add_url_pattern(self.url_pattern, api_view_f,
self.view_name)
return f
diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py
index 09f3cbdfe..39129d606 100644
--- a/swh/web/api/utils.py
+++ b/swh/web/api/utils.py
@@ -1,306 +1,311 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.common.utils import reverse
from swh.web.common.query import parse_hash
def filter_field_keys(data, field_keys):
"""Given an object instance (directory or list), and a csv field keys
to filter on.
Return the object instance with filtered keys.
Note: Returns obj as is if it's an instance of types not in (dictionary,
list)
Args:
- data: one object (dictionary, list...) to filter.
- field_keys: csv or set of keys to filter the object on
Returns:
obj filtered on field_keys
"""
if isinstance(data, map):
return map(lambda x: filter_field_keys(x, field_keys), data)
if isinstance(data, list):
return [filter_field_keys(x, field_keys) for x in data]
if isinstance(data, dict):
return {k: v for (k, v) in data.items() if k in field_keys}
return data
def person_to_string(person):
"""Map a person (person, committer, tagger, etc...) to a string.
"""
return ''.join([person['name'], ' <', person['email'], '>'])
def enrich_object(object):
"""Enrich an object (revision, release) with link to the 'target' of
type 'target_type'.
Args:
object: An object with target and target_type keys
(e.g. release, revision)
Returns:
Object enriched with target_url pointing to the right
swh.web.ui.api urls for the pointing object (revision,
release, content, directory)
"""
obj = object.copy()
if 'target' in obj and 'target_type' in obj:
if obj['target_type'] == 'revision':
- obj['target_url'] = reverse('revision',
+ obj['target_url'] = reverse('api-revision',
kwargs={'sha1_git': obj['target']})
elif obj['target_type'] == 'release':
- obj['target_url'] = reverse('release',
+ obj['target_url'] = reverse('api-release',
kwargs={'sha1_git': obj['target']})
elif obj['target_type'] == 'content':
obj['target_url'] = \
- reverse('content', kwargs={'q': 'sha1_git:' + obj['target']})
+ reverse('api-content',
+ kwargs={'q': 'sha1_git:' + obj['target']})
elif obj['target_type'] == 'directory':
- obj['target_url'] = reverse('directory',
+ obj['target_url'] = reverse('api-directory',
kwargs={'sha1_git': obj['target']})
if 'author' in obj:
author = obj['author']
- obj['author_url'] = reverse('person',
+ obj['author_url'] = reverse('api-person',
kwargs={'person_id': author['id']})
return obj
enrich_release = enrich_object
def enrich_directory(directory, context_url=None):
"""Enrich directory with url to content or directory.
"""
if 'type' in directory:
target_type = directory['type']
target = directory['target']
if target_type == 'file':
directory['target_url'] = \
- reverse('content', kwargs={'q': 'sha1_git:%s' % target})
+ reverse('api-content', kwargs={'q': 'sha1_git:%s' % target})
if context_url:
directory['file_url'] = context_url + directory['name'] + '/'
elif target_type == 'dir':
- directory['target_url'] = reverse('directory',
+ directory['target_url'] = reverse('api-directory',
kwargs={'sha1_git': target})
if context_url:
directory['dir_url'] = context_url + directory['name'] + '/'
else:
- directory['target_url'] = reverse('revision',
+ directory['target_url'] = reverse('api-revision',
kwargs={'sha1_git': target})
if context_url:
directory['rev_url'] = context_url + directory['name'] + '/'
return directory
def enrich_metadata_endpoint(content):
"""Enrich metadata endpoint with link to the upper metadata endpoint.
"""
c = content.copy()
- c['content_url'] = reverse('content', args=['sha1:%s' % c['id']])
+ c['content_url'] = reverse('api-content', args=['sha1:%s' % c['id']])
return c
def enrich_content(content, top_url=False, query_string=None):
"""Enrich content with links to:
- data_url: its raw data
- filetype_url: its filetype information
- language_url: its programming language information
- license_url: its licensing information
Args:
content: dict of data associated to a swh content object
top_url: whether or not to include the content url in
the enriched data
query_string: optional query string of type ':'
used when requesting the content, it acts as a hint
for picking the same hash method when computing
the url listed above
Returns:
An enriched content dict filled with additional urls
"""
checksums = content
if 'checksums' in content:
checksums = content['checksums']
hash_algo = 'sha1'
if query_string:
hash_algo = parse_hash(query_string)[0]
if hash_algo in checksums:
q = '%s:%s' % (hash_algo, checksums[hash_algo])
if top_url:
- content['content_url'] = reverse('content', kwargs={'q': q})
- content['data_url'] = reverse('content-raw', kwargs={'q': q})
- content['filetype_url'] = reverse('content-filetype',
+ content['content_url'] = reverse('api-content', kwargs={'q': q})
+ content['data_url'] = reverse('api-content-raw', kwargs={'q': q})
+ content['filetype_url'] = reverse('api-content-filetype',
kwargs={'q': q})
- content['language_url'] = reverse('content-language',
+ content['language_url'] = reverse('api-content-language',
kwargs={'q': q})
- content['license_url'] = reverse('content-license',
+ content['license_url'] = reverse('api-content-license',
kwargs={'q': q})
return content
def enrich_entity(entity):
"""Enrich entity with
"""
if 'uuid' in entity:
- entity['uuid_url'] = reverse('entity',
+ entity['uuid_url'] = reverse('api-entity',
kwargs={'uuid': entity['uuid']})
if 'parent' in entity and entity['parent']:
- entity['parent_url'] = reverse('entity',
+ entity['parent_url'] = reverse('api-entity',
kwargs={'uuid': entity['parent']})
return entity
def _get_path_list(path_string):
"""Helper for enrich_revision: get a list of the sha1 id of the navigation
breadcrumbs, ordered from the oldest to the most recent.
Args:
path_string: the path as a '/'-separated string
Returns:
The navigation context as a list of sha1 revision ids
"""
return path_string.split('/')
def _get_revision_contexts(rev_id, context):
"""Helper for enrich_revision: retrieve for the revision id and potentially
the navigation breadcrumbs the context to pass to parents and children of
of the revision.
Args:
rev_id: the revision's sha1 id
context: the current navigation context
Returns:
The context for parents, children and the url of the direct child as a
tuple in that order.
"""
context_for_parents = None
context_for_children = None
url_direct_child = None
if not context:
return (rev_id, None, None)
path_list = _get_path_list(context)
context_for_parents = '%s/%s' % (context, rev_id)
prev_for_children = path_list[:-1]
if len(prev_for_children) > 0:
context_for_children = '/'.join(prev_for_children)
child_id = path_list[-1]
# This commit is not the first commit in the path
if context_for_children:
- url_direct_child = reverse('revision-context',
+ url_direct_child = reverse('api-revision-context',
kwargs={'sha1_git': child_id,
'context': context_for_children})
# This commit is the first commit in the path
else:
- url_direct_child = reverse('revision', kwargs={'sha1_git': child_id})
+ url_direct_child = reverse('api-revision',
+ kwargs={'sha1_git': child_id})
return (context_for_parents, context_for_children, url_direct_child)
def _make_child_url(rev_children, context):
"""Helper for enrich_revision: retrieve the list of urls corresponding
to the children of the current revision according to the navigation
breadcrumbs.
Args:
rev_children: a list of revision id
context: the '/'-separated navigation breadcrumbs
Returns:
the list of the children urls according to the context
"""
children = []
for child in rev_children:
if context and child != _get_path_list(context)[-1]:
- children.append(reverse('revision',
+ children.append(reverse('api-revision',
kwargs={'sha1_git': child}))
elif not context:
- children.append(reverse('revision', kwargs={'sha1_git': child}))
+ children.append(reverse('api-revision',
+ kwargs={'sha1_git': child}))
return children
def enrich_revision(revision, context=None):
"""Enrich revision with links where it makes sense (directory, parents).
Keep track of the navigation breadcrumbs if they are specified.
Args:
revision: the revision as a dict
context: the navigation breadcrumbs as a /-separated string of revision
sha1_git
"""
ctx_parents, ctx_children, url_direct_child = _get_revision_contexts(
revision['id'], context)
- revision['url'] = reverse('revision', kwargs={'sha1_git': revision['id']})
- revision['history_url'] = reverse('revision-log',
+ revision['url'] = reverse('api-revision',
+ kwargs={'sha1_git': revision['id']})
+ revision['history_url'] = reverse('api-revision-log',
kwargs={'sha1_git': revision['id']})
if context:
revision['history_context_url'] = reverse(
- 'revision-log', kwargs={'sha1_git': revision['id'],
- 'prev_sha1s': context})
+ 'api-revision-log', kwargs={'sha1_git': revision['id'],
+ 'prev_sha1s': context})
if 'author' in revision:
author = revision['author']
- revision['author_url'] = reverse('person',
+ revision['author_url'] = reverse('api-person',
kwargs={'person_id': author['id']})
if 'committer' in revision:
committer = revision['committer']
revision['committer_url'] = \
- reverse('person', kwargs={'person_id': committer['id']})
+ reverse('api-person', kwargs={'person_id': committer['id']})
if 'directory' in revision:
revision['directory_url'] = \
- reverse('directory', kwargs={'sha1_git': revision['directory']})
+ reverse('api-directory',
+ kwargs={'sha1_git': revision['directory']})
if 'parents' in revision:
parents = []
for parent in revision['parents']:
parents.append({
'id': parent,
- 'url': reverse('revision', kwargs={'sha1_git': parent})
+ 'url': reverse('api-revision', kwargs={'sha1_git': parent})
})
revision['parents'] = parents
if 'children' in revision:
children = _make_child_url(revision['children'], context)
if url_direct_child:
children.append(url_direct_child)
revision['children_urls'] = children
else:
if url_direct_child:
revision['children_urls'] = [url_direct_child]
if 'message_decoding_failed' in revision:
- revision['message_url'] = reverse('revision-raw-message',
+ revision['message_url'] = reverse('api-revision-raw-message',
kwargs={'sha1_git': revision['id']})
return revision
diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py
index 7cd775abf..f3db561b2 100644
--- a/swh/web/api/views/content.py
+++ b/swh/web/api/views/content.py
@@ -1,411 +1,413 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import functools
from django.http import HttpResponse
from swh.web.common import service
from swh.web.common.utils import reverse
from swh.web.common.exc import NotFoundExc, ForbiddenExc
from swh.web.api.apidoc import api_doc
from swh.web.api import utils
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
-@api_route(r'/content/(?P.+)/provenance/', 'content-provenance')
+@api_route(r'/content/(?P.+)/provenance/', 'api-content-provenance')
@api_doc('/content/provenance/', tags=['hidden'])
def api_content_provenance(request, q):
"""Return content's provenance information if any.
"""
def _enrich_revision(provenance):
p = provenance.copy()
p['revision_url'] = \
- reverse('revision', kwargs={'sha1_git': provenance['revision']})
+ reverse('api-revision',
+ kwargs={'sha1_git': provenance['revision']})
p['content_url'] = \
- reverse('content',
+ reverse('api-content',
kwargs={'q': 'sha1_git:%s' % provenance['content']})
p['origin_url'] = \
- reverse('origin', kwargs={'origin_id': provenance['origin']})
+ reverse('api-origin', kwargs={'origin_id': provenance['origin']})
p['origin_visits_url'] = \
- reverse('origin-visits',
+ reverse('api-origin-visits',
kwargs={'origin_id': provenance['origin']})
p['origin_visit_url'] = \
- reverse('origin-visit', kwargs={'origin_id': provenance['origin'],
- 'visit_id': provenance['visit']})
+ reverse('api-origin-visit',
+ kwargs={'origin_id': provenance['origin'],
+ 'visit_id': provenance['visit']})
return p
return api_lookup(
service.lookup_content_provenance, q,
notfound_msg='Content with {} not found.'.format(q),
enrich_fn=_enrich_revision)
-@api_route(r'/content/(?P.+)/filetype/', 'content-filetype')
+@api_route(r'/content/(?P.+)/filetype/', 'api-content-filetype')
@api_doc('/content/filetype/')
def api_content_filetype(request, q):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/filetype/
Get information about the detected MIME type of a content object.
:param string hash_type: optional parameter specifying which hashing algorithm has been used
to compute the content checksum. It can be either *sha1*, *sha1_git*, *sha256*
or *blake2s256*. If that parameter is not provided, it is assumed that the
hashing algorithm used is *sha1*.
:param string hash: hexadecimal representation of the checksum value computed with
the specified hashing algorithm.
:>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for
getting information about the content
:>json string encoding: the detected content encoding
:>json string id: the *sha1* identifier of the content
:>json string mimetype: the detected MIME type of the content
:>json object tool: information about the tool used to detect the content filetype
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *hash_type* or *hash* has been provided
:statuscode 404: requested content can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/filetype/`
""" # noqa
return api_lookup(
service.lookup_content_filetype, q,
notfound_msg='No filetype information found for content {}.'.format(q),
enrich_fn=utils.enrich_metadata_endpoint)
-@api_route(r'/content/(?P.+)/language/', 'content-language')
+@api_route(r'/content/(?P.+)/language/', 'api-content-language')
@api_doc('/content/language/')
def api_content_language(request, q):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/language/
Get information about the programming language used in a content object.
:param string hash_type: optional parameter specifying which hashing algorithm has been used
to compute the content checksum. It can be either *sha1*, *sha1_git*, *sha256*
or *blake2s256*. If that parameter is not provided, it is assumed that the
hashing algorithm used is *sha1*.
:param string hash: hexadecimal representation of the checksum value computed with
the specified hashing algorithm.
:>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for
getting information about the content
:>json string id: the *sha1* identifier of the content
:>json string lang: the detected programming language if any
:>json object tool: information about the tool used to detect the programming language
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *hash_type* or *hash* has been provided
:statuscode 404: requested content can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/language/`
""" # noqa
return api_lookup(
service.lookup_content_language, q,
notfound_msg='No language information found for content {}.'.format(q),
enrich_fn=utils.enrich_metadata_endpoint)
-@api_route(r'/content/(?P.+)/license/', 'content-license')
+@api_route(r'/content/(?P.+)/license/', 'api-content-license')
@api_doc('/content/license/')
def api_content_license(request, q):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/license/
Get information about the license of a content object.
:param string hash_type: optional parameter specifying which hashing algorithm has been used
to compute the content checksum. It can be either *sha1*, *sha1_git*, *sha256*
or *blake2s256*. If that parameter is not provided, it is assumed that the
hashing algorithm used is *sha1*.
:param string hash: hexadecimal representation of the checksum value computed with
the specified hashing algorithm.
:>json object content_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/` for
getting information about the content
:>json string id: the *sha1* identifier of the content
:>json array licenses: array of strings containing the detected license names if any
:>json object tool: information about the tool used to detect the license
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *hash_type* or *hash* has been provided
:statuscode 404: requested content can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/license/`
""" # noqa
return api_lookup(
service.lookup_content_license, q,
notfound_msg='No license information found for content {}.'.format(q),
enrich_fn=utils.enrich_metadata_endpoint)
-@api_route(r'/content/(?P.+)/ctags/', 'content-ctags')
+@api_route(r'/content/(?P.+)/ctags/', 'api-content-ctags')
@api_doc('/content/ctags/', tags=['hidden'])
def api_content_ctags(request, q):
"""
Get information about all `Ctags `_-style
symbols defined in a content object.
"""
return api_lookup(
service.lookup_content_ctags, q,
notfound_msg='No ctags symbol found for content {}.'.format(q),
enrich_fn=utils.enrich_metadata_endpoint)
-@api_route(r'/content/(?P.+)/raw/', 'content-raw')
+@api_route(r'/content/(?P.+)/raw/', 'api-content-raw')
@api_doc('/content/raw/', handle_response=True)
def api_content_raw(request, q):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/raw/
Get the raw content of a content object (aka a "blob"), as a byte sequence.
:param string hash_type: optional parameter specifying which hashing algorithm has been used
to compute the content checksum. It can be either *sha1*, *sha1_git*, *sha256*
or *blake2s256*. If that parameter is not provided, it is assumed that the
hashing algorithm used is *sha1*.
:param string hash: hexadecimal representation of the checksum value computed with
the specified hashing algorithm.
:query string filename: if provided, the downloaded content will get that filename
:resheader Content-Type: application/octet-stream
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *hash_type* or *hash* has been provided
:statuscode 404: requested content can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/`
""" # noqa
def generate(content):
yield content['data']
content_raw = service.lookup_content_raw(q)
if not content_raw:
raise NotFoundExc('Content %s is not found.' % q)
content_filetype = service.lookup_content_filetype(q)
if not content_filetype:
raise NotFoundExc('Content %s is not available for download.' % q)
mimetype = content_filetype['mimetype']
if 'text/' not in mimetype:
raise ForbiddenExc('Only textual content is available for download. '
'Actual content mimetype is %s.' % mimetype)
filename = request.query_params.get('filename')
if not filename:
filename = 'content_%s_raw' % q.replace(':', '_')
response = HttpResponse(generate(content_raw),
content_type='application/octet-stream')
response['Content-disposition'] = 'attachment; filename=%s' % filename
return response
-@api_route(r'/content/symbol/(?P.+)/', 'content-symbol')
+@api_route(r'/content/symbol/(?P.+)/', 'api-content-symbol')
@api_doc('/content/symbol/', tags=['hidden'])
def api_content_symbol(request, q=None):
"""Search content objects by `Ctags `_-style
symbol (e.g., function name, data type, method, ...).
"""
result = {}
last_sha1 = request.query_params.get('last_sha1', None)
per_page = int(request.query_params.get('per_page', '10'))
def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page):
return service.lookup_expression(exp, last_sha1, per_page)
symbols = api_lookup(
lookup_exp, q,
notfound_msg="No indexed raw content match expression '{}'.".format(q),
enrich_fn=functools.partial(utils.enrich_content, top_url=True))
if symbols:
nb_symbols = len(symbols)
if nb_symbols == per_page:
query_params = {}
new_last_sha1 = symbols[-1]['sha1']
query_params['last_sha1'] = new_last_sha1
if request.query_params.get('per_page'):
query_params['per_page'] = per_page
result['headers'] = {
- 'link-next': reverse('content-symbol', kwargs={'q': q},
+ 'link-next': reverse('api-content-symbol', kwargs={'q': q},
query_params=query_params)
}
result.update({
'results': symbols
})
return result
-@api_route(r'/content/known/search/', 'content-known', methods=['POST'])
-@api_route(r'/content/known/(?P(?!search).*)/', 'content-known')
+@api_route(r'/content/known/search/', 'api-content-known', methods=['POST'])
+@api_route(r'/content/known/(?P(?!search).*)/', 'api-content-known')
@api_doc('/content/known/', tags=['hidden'])
def api_check_content_known(request, q=None):
"""
.. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/
Check whether some content(s) (aka "blob(s)") is present in the SWH archive
based on its *sha1* checksum.
:param string sha1: hexadecimal representation of the *sha1* checksum value
for the content to check existence. Multiple values can be provided separated
by ','.
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json array search_res: array holding the search result for each provided *sha1*
:>json object search_stats: some statistics regarding the number of *sha1* provided
and the percentage of those found in the SWH archive
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *sha1* has been provided
**Example:**
.. parsed-literal::
:swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/`
""" # noqa
response = {'search_res': None,
'search_stats': None}
search_stats = {'nbfiles': 0, 'pct': 0}
search_res = None
queries = []
# GET: Many hash separated values request
if q:
hashes = q.split(',')
for v in hashes:
queries.append({'filename': None, 'sha1': v})
# POST: Many hash requests in post form submission
elif request.method == 'POST':
data = request.data
# Remove potential inputs with no associated value
for k, v in data.items():
if v is not None:
if k == 'q' and len(v) > 0:
queries.append({'filename': None, 'sha1': v})
elif v != '':
queries.append({'filename': k, 'sha1': v})
if queries:
lookup = service.lookup_multiple_hashes(queries)
result = []
nb_queries = len(queries)
for el in lookup:
res_d = {'sha1': el['sha1'],
'found': el['found']}
if 'filename' in el and el['filename']:
res_d['filename'] = el['filename']
result.append(res_d)
search_res = result
nbfound = len([x for x in lookup if x['found']])
search_stats['nbfiles'] = nb_queries
search_stats['pct'] = (nbfound / nb_queries) * 100
response['search_res'] = search_res
response['search_stats'] = search_stats
return response
-@api_route(r'/content/(?P.+)/', 'content')
+@api_route(r'/content/(?P.+)/', 'api-content')
@api_doc('/content/')
def api_content_metadata(request, q):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/
Get information about a content (aka a "blob") object.
In the SWH archive, a content object is identified based on checksum
values computed using various hashing algorithms.
:param string hash_type: optional parameter specifying which hashing algorithm has been used
to compute the content checksum. It can be either *sha1*, *sha1_git*, *sha256*
or *blake2s256*. If that parameter is not provided, it is assumed that the
hashing algorithm used is *sha1*.
:param string hash: hexadecimal representation of the checksum value computed with
the specified hashing algorithm.
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json object checksums: object holding the computed checksum values for the requested content
:>json string data_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/raw/`
for downloading the content raw bytes
:>json string filetype_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/filetype/`
for getting information about the content MIME type
:>json string language_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/language/`
for getting information about the programming language used in the content
:>json number length: length of the content in bytes
:>json string license_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/license/`
for getting information about the license of the content
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *hash_type* or *hash* has been provided
:statuscode 404: requested content can not be found in the SWH archive
**Example:**
.. parsed-literal::
curl -i :swh_web_api:`content/sha1_git:fe95a46679d128ff167b7c55df5d02356c5a1ae1/`
""" # noqa
return api_lookup(
service.lookup_content, q,
notfound_msg='Content with {} not found.'.format(q),
enrich_fn=functools.partial(utils.enrich_content, query_string=q))
diff --git a/swh/web/api/views/directory.py b/swh/web/api/views/directory.py
index 5eeb3f976..a6dbe2b9e 100644
--- a/swh/web/api/views/directory.py
+++ b/swh/web/api/views/directory.py
@@ -1,74 +1,75 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.common import service
from swh.web.api import utils
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
-@api_route(r'/directory/(?P[0-9a-f]+)/', 'directory')
-@api_route(r'/directory/(?P[0-9a-f]+)/(?P.+)/', 'directory')
+@api_route(r'/directory/(?P[0-9a-f]+)/', 'api-directory')
+@api_route(r'/directory/(?P[0-9a-f]+)/(?P.+)/',
+ 'api-directory')
@api_doc('/directory/')
def api_directory(request, sha1_git, path=None):
"""
.. http:get:: /api/1/directory/(sha1_git)/[(path)/]
Get information about directory objects.
Directories are identified by *sha1* checksums, compatible with Git directory identifiers.
See :func:`swh.model.identifiers.directory_identifier` in our data model module for details
about how they are computed.
When given only a directory identifier, this endpoint returns information about the directory itself,
returning its content (usually a list of directory entries). When given a directory identifier and a
path, this endpoint returns information about the directory entry pointed by the relative path,
starting path resolution from the given directory.
:param string sha1_git: hexadecimal representation of the directory *sha1_git* identifier
:param string path: optional parameter to get information about the directory entry
pointed by that relative path
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>jsonarr object checksums: object holding the computed checksum values for a directory entry
(only for file entries)
:>jsonarr string dir_id: *sha1_git* identifier of the requested directory
:>jsonarr number length: length of a directory entry in bytes (only for file entries)
for getting information about the content MIME type
:>jsonarr string name: the directory entry name
:>jsonarr number perms: permissions for the directory entry
:>jsonarr string target: *sha1_git* identifier of the directory entry
:>jsonarr string target_url: link to :http:get:`/api/1/content/[(hash_type):](hash)/`
or :http:get:`/api/1/directory/(sha1_git)/[(path)/]` depending on the directory entry type
:>jsonarr string type: the type of the directory entry, can be either *dir*, *file* or *rev*
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *hash_type* or *hash* has been provided
:statuscode 404: requested directory can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`directory/977fc4b98c0e85816348cebd3b12026407c368b6/`
""" # noqa
if path:
error_msg_path = ('Entry with path %s relative to directory '
'with sha1_git %s not found.') % (path, sha1_git)
return api_lookup(
service.lookup_directory_with_path, sha1_git, path,
notfound_msg=error_msg_path,
enrich_fn=utils.enrich_directory)
else:
error_msg_nopath = 'Directory with sha1_git %s not found.' % sha1_git
return api_lookup(
service.lookup_directory, sha1_git,
notfound_msg=error_msg_nopath,
enrich_fn=utils.enrich_directory)
diff --git a/swh/web/api/views/entity.py b/swh/web/api/views/entity.py
index c459cb966..53d8b9655 100644
--- a/swh/web/api/views/entity.py
+++ b/swh/web/api/views/entity.py
@@ -1,22 +1,22 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.common import service
from swh.web.api import utils
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
-@api_route(r'/entity/(?P.+)/', 'entity')
+@api_route(r'/entity/(?P.+)/', 'api-entity')
@api_doc('/entity/', tags=['hidden'])
def api_entity_by_uuid(request, uuid):
"""Return content information if content is found.
"""
return api_lookup(
service.lookup_entity_by_uuid, uuid,
notfound_msg="Entity with uuid '%s' not found." % uuid,
enrich_fn=utils.enrich_entity)
diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py
index 7088fe327..a1499b957 100644
--- a/swh/web/api/views/identifiers.py
+++ b/swh/web/api/views/identifiers.py
@@ -1,77 +1,77 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.model.identifiers import (
CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT
)
from swh.web.common import service
from swh.web.common.utils import resolve_swh_persistent_id
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
@api_route(r'/resolve/(?P.*)/',
- 'resolve-swh-pid')
+ 'api-resolve-swh-pid')
@api_doc('/resolve/')
def api_resolve_swh_pid(request, swh_id):
"""
.. http:get:: /api/1/resolve/(swh_id)/
Resolve a Software Heritage persistent identifier.
Try to resolve a provided `persistent identifier `_
into an url for browsing the pointed archive object. If the provided
identifier is valid, the existence of the object in the archive
will also be checked.
:param string swh_id: a SWH presistent identifier
:>json string browse_url: the url for browsing the pointed object
:>json object metadata: object holding optional parts of the persistent identifier
:>json string namespace: the persistent identifier namespace
:>json string object_id: the hash identifier of the pointed object
:>json string object_type: the type of the pointed object
:>json number scheme_version: the scheme version of the persistent identifier
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid persistent identifier has been provided
:statuscode 404: the pointed object does not exist in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`resolve/swh:1:rev:96db9023b881d7cd9f379b0c154650d6c108e9a3;origin=https://github.com/openssl/openssl/`
""" # noqa
# try to resolve the provided pid
swh_id_resolved = resolve_swh_persistent_id(swh_id)
# id is well-formed, now check that the pointed
# object is present in the archive, NotFoundExc
# will be raised otherwise
swh_id_parsed = swh_id_resolved['swh_id_parsed']
object_type = swh_id_parsed.object_type
object_id = swh_id_parsed.object_id
if object_type == CONTENT:
service.lookup_content('sha1_git:%s' % object_id)
elif object_type == DIRECTORY:
service.lookup_directory(object_id)
elif object_type == RELEASE:
service.lookup_release(object_id)
elif object_type == REVISION:
service.lookup_revision(object_id)
elif object_type == SNAPSHOT:
service.lookup_snapshot(object_id)
# id is well-formed and the pointed object exists
swh_id_data = swh_id_parsed._asdict()
swh_id_data['browse_url'] = swh_id_resolved['browse_url']
return swh_id_data
diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py
index eab40f05d..56e95eabe 100644
--- a/swh/web/api/views/origin.py
+++ b/swh/web/api/views/origin.py
@@ -1,330 +1,330 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from distutils.util import strtobool
from swh.web.common import service
from swh.web.common.utils import (
reverse, get_origin_visits
)
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
def _enrich_origin(origin):
if 'id' in origin:
o = origin.copy()
o['origin_visits_url'] = \
- reverse('origin-visits', kwargs={'origin_id': origin['id']})
+ reverse('api-origin-visits', kwargs={'origin_id': origin['id']})
return o
return origin
-@api_route(r'/origin/(?P[0-9]+)/', 'origin')
+@api_route(r'/origin/(?P[0-9]+)/', 'api-origin')
@api_route(r'/origin/(?P[a-z]+)/url/(?P.+)',
- 'origin')
+ 'api-origin')
@api_doc('/origin/')
def api_origin(request, origin_id=None, origin_type=None, origin_url=None):
"""
.. http:get:: /api/1/origin/(origin_id)/
Get information about a software origin.
:param int origin_id: a SWH origin identifier
:>json number id: the origin unique identifier
:>json string origin_visits_url: link to in order to get information about the SWH
visits for that origin
:>json string type: the type of software origin (*git*, *svn*, *hg*, *deb*, *ftp*, ...)
:>json string url: the origin canonical url
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/1/`
.. http:get:: /api/1/origin/(origin_type)/url/(origin_url)/
Get information about a software origin.
:param string origin_type: the origin type (*git*, *svn*, *hg*, *deb*, *ftp*, ...)
:param string origin_url: the origin url
:>json number id: the origin unique identifier
:>json string origin_visits_url: link to in order to get information about the SWH
visits for that origin
:>json string type: the type of software origin (*git*, *svn*, *hg*, *deb*, *ftp*, ...)
:>json string url: the origin canonical url
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/git/url/https://github.com/python/cpython/`
""" # noqa
ori_dict = {
'id': origin_id,
'type': origin_type,
'url': origin_url
}
ori_dict = {k: v for k, v in ori_dict.items() if ori_dict[k]}
if 'id' in ori_dict:
error_msg = 'Origin with id %s not found.' % ori_dict['id']
else:
error_msg = 'Origin with type %s and URL %s not found' % (
ori_dict['type'], ori_dict['url'])
return api_lookup(
service.lookup_origin, ori_dict,
notfound_msg=error_msg,
enrich_fn=_enrich_origin)
@api_route(r'/origin/search/(?P.+)/',
- 'origin-search')
+ 'api-origin-search')
@api_doc('/origin/search/')
def api_origin_search(request, url_pattern):
"""
.. http:get:: /api/1/origin/search/(url_pattern)/
Search for software origins whose urls contain a provided string
pattern or match a provided regular expression.
The search is performed in a case insensitive way.
:param string url_pattern: a string pattern or a regular expression
:query int offset: the number of found origins to skip before returning results
:query int limit: the maximum number of found origins to return
:query boolean regexp: if true, consider provided pattern as a regular expression
and search origins whose urls match it
:query boolean with_visit: if true, only return origins with at least one visit
by Software heritage
:>jsonarr number id: the origin unique identifier
:>jsonarr string origin_visits_url: link to in order to get information about the SWH
visits for that origin
:>jsonarr string type: the type of software origin (*git*, *svn*, *hg*, *deb*, *ftp*, ...)
:>jsonarr string url: the origin canonical url
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origin/search/python/?limit=2`
""" # noqa
result = {}
offset = int(request.query_params.get('offset', '0'))
limit = int(request.query_params.get('limit', '70'))
regexp = request.query_params.get('regexp', 'false')
with_visit = request.query_params.get('with_visit', 'false')
results = api_lookup(service.search_origin, url_pattern, offset, limit,
bool(strtobool(regexp)), bool(strtobool(with_visit)),
enrich_fn=_enrich_origin)
nb_results = len(results)
if nb_results == limit:
query_params = {}
query_params['offset'] = offset + limit
query_params['limit'] = limit
query_params['regexp'] = regexp
result['headers'] = {
- 'link-next': reverse('origin-search',
+ 'link-next': reverse('api-origin-search',
kwargs={'url_pattern': url_pattern},
query_params=query_params)
}
result.update({
'results': results
})
return result
-@api_route(r'/origin/(?P[0-9]+)/visits/', 'origin-visits')
+@api_route(r'/origin/(?P[0-9]+)/visits/', 'api-origin-visits')
@api_doc('/origin/visits/')
def api_origin_visits(request, origin_id):
"""
.. http:get:: /api/1/origin/(origin_id)/visits/
Get information about all visits of a software origin.
Visits are returned sorted in descending order according
to their date.
:param int origin_id: a SWH origin identifier
:query int per_page: specify the number of visits to list, for pagination purposes
:query int last_visit: visit to start listing from, for pagination purposes
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:resheader Link: indicates that a subsequent result page is available and contains
the url pointing to it
:>jsonarr string date: ISO representation of the visit date (in UTC)
:>jsonarr number id: the unique identifier of the origin
:>jsonarr string origin_visit_url: link to :http:get:`/api/1/origin/(origin_id)/visit/(visit_id)/`
in order to get information about the visit
:>jsonarr string snapshot: the snapshot identifier of the visit
:>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/`
in order to get information about the snapshot of the visit
:>jsonarr string status: status of the visit (either *full*, *partial* or *ongoing*)
:>jsonarr number visit: the unique identifier of the visit
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/1/visits/`
""" # noqa
result = {}
per_page = int(request.query_params.get('per_page', '10'))
last_visit = request.query_params.get('last_visit')
if last_visit:
last_visit = int(last_visit)
def _lookup_origin_visits(
origin_id, last_visit=last_visit, per_page=per_page):
all_visits = get_origin_visits({'id': origin_id})
all_visits.reverse()
visits = []
if not last_visit:
visits = all_visits[:per_page]
else:
for i, v in enumerate(all_visits):
if v['visit'] == last_visit:
visits = all_visits[i+1:i+1+per_page]
break
for v in visits:
yield v
def _enrich_origin_visit(origin_visit):
ov = origin_visit.copy()
- ov['origin_visit_url'] = reverse('origin-visit',
+ ov['origin_visit_url'] = reverse('api-origin-visit',
kwargs={'origin_id': origin_id,
'visit_id': ov['visit']})
snapshot = ov['snapshot']
if snapshot:
- ov['snapshot_url'] = reverse('snapshot',
+ ov['snapshot_url'] = reverse('api-snapshot',
kwargs={'snapshot_id': snapshot})
else:
ov['snapshot_url'] = None
return ov
results = api_lookup(_lookup_origin_visits, origin_id,
notfound_msg='No origin {} found'.format(origin_id),
enrich_fn=_enrich_origin_visit)
if results:
nb_results = len(results)
if nb_results == per_page:
new_last_visit = results[-1]['visit']
query_params = {}
query_params['last_visit'] = new_last_visit
if request.query_params.get('per_page'):
query_params['per_page'] = per_page
result['headers'] = {
- 'link-next': reverse('origin-visits',
+ 'link-next': reverse('api-origin-visits',
kwargs={'origin_id': origin_id},
query_params=query_params)
}
result.update({
'results': results
})
return result
@api_route(r'/origin/(?P[0-9]+)/visit/(?P[0-9]+)/',
- 'origin-visit')
+ 'api-origin-visit')
@api_doc('/origin/visit/')
def api_origin_visit(request, origin_id, visit_id):
"""
.. http:get:: /api/1/origin/(origin_id)/visit/(visit_id)/
Get information about a specific visit of a software origin.
:param int origin_id: a SWH origin identifier
:param int visit_id: a visit identifier
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json string date: ISO representation of the visit date (in UTC)
:>json number origin: the origin unique identifier
:>json string origin_url: link to get information about the origin
:>jsonarr string snapshot: the snapshot identifier of the visit
:>jsonarr string snapshot_url: link to :http:get:`/api/1/snapshot/(snapshot_id)/`
in order to get information about the snapshot of the visit
:>json string status: status of the visit (either *full*, *partial* or *ongoing*)
:>json number visit: the unique identifier of the visit
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested origin or visit can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/1500/visit/1/`
""" # noqa
def _enrich_origin_visit(origin_visit):
ov = origin_visit.copy()
- ov['origin_url'] = reverse('origin',
+ ov['origin_url'] = reverse('api-origin',
kwargs={'origin_id': ov['origin']})
snapshot = ov['snapshot']
if snapshot:
- ov['snapshot_url'] = reverse('snapshot',
+ ov['snapshot_url'] = reverse('api-snapshot',
kwargs={'snapshot_id': snapshot})
else:
ov['snapshot_url'] = None
return ov
return api_lookup(
service.lookup_origin_visit, origin_id, visit_id,
notfound_msg=('No visit {} for origin {} found'
.format(visit_id, origin_id)),
enrich_fn=_enrich_origin_visit)
diff --git a/swh/web/api/views/origin_save.py b/swh/web/api/views/origin_save.py
index 1e9cf4ca8..d7b4e41d6 100644
--- a/swh/web/api/views/origin_save.py
+++ b/swh/web/api/views/origin_save.py
@@ -1,79 +1,79 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.views.decorators.cache import never_cache
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.common.origin_save import (
create_save_origin_request, get_save_origin_requests
)
@api_route(r'/origin/save/(?P.+)/url/(?P.+)/',
- 'save-origin', methods=['GET', 'POST'],
+ 'api-save-origin', methods=['GET', 'POST'],
throttle_scope='swh_save_origin')
@never_cache
@api_doc('/origin/save/')
def api_save_origin(request, origin_type, origin_url):
"""
.. http:get:: /api/1/origin/save/(origin_type)/url/(origin_url)/
.. http:post:: /api/1/origin/save/(origin_type)/url/(origin_url)/
Request the saving of a software origin into the archive
or check the status of previously created save requests.
That endpoint enables to create a saving task for a software origin
through a POST request.
Depending of the provided origin url, the save request can either be:
* immediately **accepted**, for well kwown code hosting providers
like for instance GitHub or GitLab
* **rejected**, in case the url is blacklisted by Software Heritage
* **put in pending state** until a manual check is done in order to
determine if it can be loaded or not
Once a saving request has been accepted, its associated saving task status can
then be checked through a GET request on the same url. Returned status can either be:
* **not created**: no saving task has been created
* **not yet scheduled**: saving task has been created but its execution has not
yet been scheduled
* **scheduled**: the task execution has been scheduled
* **succeed**: the saving task has been successfully executed
* **failed**: the saving task has been executed but it failed
When issuing a POST request an object will be returned while a GET request will
return an array of objects (as multiple save requests might have been submitted
for the same origin).
- :param string origin_type: the type of origin to save
+ :param string origin_type: the type of origin to save
(currently only *git* but *hg* and *svn* will soon be available)
:param string origin_url: the url of the origin to save
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json string origin_url: the url of the origin to save
:>json string origin_type: the type of the origin to save
:>json string save_request_date: the date (in iso format) the save request was issued
:>json string save_request_status: the status of the save request, either *accepted*,
*rejected* or *pending*
:>json string save_task_status: the status of the origin saving task, either *not created*,
*not yet scheduled*, *scheduled*, *succeed* or *failed*
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid origin type or url has been provided
:statuscode 403: the provided origin url is blacklisted
""" # noqa
if request.method == 'POST':
return create_save_origin_request(origin_type, origin_url)
else:
return get_save_origin_requests(origin_type, origin_url)
diff --git a/swh/web/api/views/person.py b/swh/web/api/views/person.py
index 2e7ebe755..0792dace8 100644
--- a/swh/web/api/views/person.py
+++ b/swh/web/api/views/person.py
@@ -1,44 +1,44 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.common import service
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
-@api_route(r'/person/(?P[0-9]+)/', 'person')
+@api_route(r'/person/(?P[0-9]+)/', 'api-person')
@api_doc('/person/')
def api_person(request, person_id):
"""
.. http:get:: /api/1/person/(person_id)/
Get information about a person in the SWH archive.
:param int person_id: a SWH person identifier
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json string email: the email of the person
:>json string fullname: the full name of the person: combination of its name and email
:>json number id: the unique identifier of the person
:>json string name: the name of the person
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: requested person can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`person/8275/`
""" # noqa
return api_lookup(
service.lookup_person, person_id,
notfound_msg='Person with id {} not found.'.format(person_id))
diff --git a/swh/web/api/views/release.py b/swh/web/api/views/release.py
index 80234dd9a..8abfcf8ed 100644
--- a/swh/web/api/views/release.py
+++ b/swh/web/api/views/release.py
@@ -1,58 +1,58 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.common import service
from swh.web.api import utils
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
-@api_route(r'/release/(?P[0-9a-f]+)/', 'release')
+@api_route(r'/release/(?P[0-9a-f]+)/', 'api-release')
@api_doc('/release/')
def api_release(request, sha1_git):
"""
.. http:get:: /api/1/release/(sha1_git)/
Get information about a release in the SWH archive.
Releases are identified by *sha1* checksums, compatible with Git tag identifiers.
See :func:`swh.model.identifiers.release_identifier` in our data model module for details
about how they are computed.
:param string sha1_git: hexadecimal representation of the release *sha1_git* identifier
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json object author: information about the author of the release
:>json string author_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the author of the release
:>json string date: ISO representation of the release date (in UTC)
:>json string id: the release unique identifier
:>json string message: the message associated to the release
:>json string name: the name of the release
:>json string target: the target identifier of the release
:>json string target_type: the type of the target, can be either *release*,
*revision*, *content*, *directory*
:>json string target_url: a link to the adequate api url based on the target type
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *sha1_git* value has been provided
:statuscode 404: requested release can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`release/208f61cc7a5dbc9879ae6e5c2f95891e270f09ef/`
""" # noqa
error_msg = 'Release with sha1_git %s not found.' % sha1_git
return api_lookup(
service.lookup_release, sha1_git,
notfound_msg=error_msg,
enrich_fn=utils.enrich_release)
diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py
index 6b88589f5..313e90034 100644
--- a/swh/web/api/views/revision.py
+++ b/swh/web/api/views/revision.py
@@ -1,504 +1,505 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.http import HttpResponse
from swh.web.common import service
from swh.web.common.utils import reverse
from swh.web.common.utils import parse_timestamp
from swh.web.api import utils
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
def _revision_directory_by(revision, path, request_path,
limit=100, with_data=False):
"""
Compute the revision matching criterion's directory or content data.
Args:
revision: dictionary of criterions representing a revision to lookup
path: directory's path to lookup
request_path: request path which holds the original context to
limit: optional query parameter to limit the revisions log
(default to 100). For now, note that this limit could impede the
transitivity conclusion about sha1_git not being an ancestor of
with_data: indicate to retrieve the content's raw data if path resolves
to a content.
"""
def enrich_directory_local(dir, context_url=request_path):
return utils.enrich_directory(dir, context_url)
rev_id, result = service.lookup_directory_through_revision(
revision, path, limit=limit, with_data=with_data)
content = result['content']
if result['type'] == 'dir': # dir_entries
result['content'] = list(map(enrich_directory_local, content))
else: # content
result['content'] = utils.enrich_content(content)
return result
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/branch/(?P.+)/log/',
- 'revision-origin-log')
+ 'api-revision-origin-log')
@api_route(r'/revision/origin/(?P[0-9]+)/log/',
- 'revision-origin-log')
+ 'api-revision-origin-log')
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/ts/(?P.+)/log/',
- 'revision-origin-log')
+ 'api-revision-origin-log')
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/branch/(?P.+)'
r'/ts/(?P.+)/log/',
- 'revision-origin-log')
+ 'api-revision-origin-log')
@api_doc('/revision/origin/log/')
def api_revision_log_by(request, origin_id,
branch_name='refs/heads/master',
ts=None):
"""
.. http:get:: /api/1/revision/origin/(origin_id)[/branch/(branch_name)][/ts/(timestamp)]/log
Show the commit log for a revision, searching for it based on software origin,
branch name, and/or visit timestamp.
This endpoint behaves like :http:get:`/api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/`,
but operates on the revision that has been found at a given software origin,
close to a given point in time, pointed by a given branch.
:param int origin_id: a SWH origin identifier
:param string branch_name: optional parameter specifying a fully-qualified branch name
associated to the software origin, e.g., "refs/heads/master". Defaults to the master branch.
:param string timestamp: optional parameter specifying a timestamp close to which the revision
pointed by the given branch should be looked up. The timestamp can be expressed either
as an ISO date or as a Unix one (in UTC). Defaults to now.
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>jsonarr object author: information about the author of the revision
:>jsonarr string author_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the author of the revision
:>jsonarr object committer: information about the committer of the revision
:>jsonarr string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the committer of the revision
:>jsonarr string committer_date: ISO representation of the commit date (in UTC)
:>jsonarr string date: ISO representation of the revision date (in UTC)
:>jsonarr string directory: the unique identifier that revision points to
:>jsonarr string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]`
to get information about the directory associated to the revision
:>jsonarr string id: the revision unique identifier
:>jsonarr boolean merge: whether or not the revision corresponds to a merge commit
:>jsonarr string message: the message associated to the revision
:>jsonarr array parents: the parents of the revision, i.e. the previous revisions
that head directly to it, each entry of that array contains an unique parent
revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/`
to get more information about it
:>jsonarr string type: the type of the revision
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: no revision matching the given criteria could be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`revision/origin/723566/ts/2016-01-17T00:00:00+00:00/log/`
""" # noqa
result = {}
per_page = int(request.query_params.get('per_page', '10'))
if ts:
ts = parse_timestamp(ts)
def lookup_revision_log_by_with_limit(o_id, br, ts, limit=per_page+1):
return service.lookup_revision_log_by(o_id, br, ts, limit)
error_msg = 'No revision matching origin %s ' % origin_id
error_msg += ', branch name %s' % branch_name
error_msg += (' and time stamp %s.' % ts) if ts else '.'
rev_get = api_lookup(
lookup_revision_log_by_with_limit, origin_id, branch_name, ts,
notfound_msg=error_msg,
enrich_fn=utils.enrich_revision)
nb_rev = len(rev_get)
if nb_rev == per_page+1:
revisions = rev_get[:-1]
last_sha1_git = rev_get[-1]['id']
params = {k: v for k, v in {'origin_id': origin_id,
'branch_name': branch_name,
'ts': ts,
}.items() if v is not None}
query_params = {}
query_params['sha1_git'] = last_sha1_git
if request.query_params.get('per_page'):
query_params['per_page'] = per_page
result['headers'] = {
- 'link-next': reverse('revision-origin-log', kwargs=params,
+ 'link-next': reverse('api-revision-origin-log', kwargs=params,
query_params=query_params)
}
else:
revisions = rev_get
result.update({'results': revisions})
return result
@api_route(r'/revision/origin/(?P[0-9]+)/directory/',
- 'revision-directory')
+ 'api-revision-origin-directory')
@api_route(r'/revision/origin/(?P[0-9]+)/directory/(?P.+)/',
- 'revision-directory')
+ 'api-revision-origin-directory')
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/branch/(?P.+)/directory/',
- 'revision-directory')
+ 'api-revision-origin-directory')
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/branch/(?P.+)/ts/(?P.+)/directory/',
- 'revision-directory')
+ 'api-revision-origin-directory')
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/branch/(?P.+)/directory/(?P.+)/',
- 'revision-directory')
+ 'api-revision-origin-directory')
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/branch/(?P.+)/ts/(?P.+)'
r'/directory/(?P.+)/',
- 'revision-directory')
+ 'api-revision-origin-directory')
@api_doc('/revision/origin/directory/', tags=['hidden'])
def api_directory_through_revision_origin(request, origin_id,
branch_name="refs/heads/master",
ts=None,
path=None,
with_data=False):
"""
Display directory or content information through a revision identified
by origin/branch/timestamp.
"""
if ts:
ts = parse_timestamp(ts)
return _revision_directory_by({'origin_id': origin_id,
'branch_name': branch_name,
'ts': ts
},
path, request.path,
with_data=with_data)
@api_route(r'/revision/origin/(?P[0-9]+)/',
- 'revision-origin')
+ 'api-revision-origin')
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/branch/(?P.+)/',
- 'revision-origin')
+ 'api-revision-origin')
@api_route(r'/revision/origin/(?P[0-9]+)'
r'/branch/(?P.+)/ts/(?P.+)/',
- 'revision-origin')
+ 'api-revision-origin')
@api_route(r'/revision/origin/(?P[0-9]+)/ts/(?P.+)/',
- 'revision-origin')
+ 'api-revision-origin')
@api_doc('/revision/origin/')
def api_revision_with_origin(request, origin_id,
branch_name="refs/heads/master",
ts=None):
"""
.. http:get:: /api/1/revision/origin/(origin_id)/[branch/(branch_name)/][ts/(timestamp)/]
Get information about a revision, searching for it based on software origin,
branch name, and/or visit timestamp.
This endpoint behaves like :http:get:`/api/1/revision/(sha1_git)/`,
but operates on the revision that has been found at a given software origin,
close to a given point in time, pointed by a given branch.
:param int origin_id: a SWH origin identifier
:param string branch_name: optional parameter specifying a fully-qualified branch name
associated to the software origin, e.g., "refs/heads/master". Defaults to the master branch.
:param string timestamp: optional parameter specifying a timestamp close to which the revision
pointed by the given branch should be looked up. The timestamp can be expressed either
as an ISO date or as a Unix one (in UTC). Defaults to now.
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json object author: information about the author of the revision
:>json string author_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the author of the revision
:>json object committer: information about the committer of the revision
:>json string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the committer of the revision
:>json string committer_date: ISO representation of the commit date (in UTC)
:>json string date: ISO representation of the revision date (in UTC)
:>json string directory: the unique identifier that revision points to
:>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]`
to get information about the directory associated to the revision
:>json string id: the revision unique identifier
:>json boolean merge: whether or not the revision corresponds to a merge commit
:>json string message: the message associated to the revision
:>json array parents: the parents of the revision, i.e. the previous revisions
that head directly to it, each entry of that array contains an unique parent
revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/`
to get more information about it
:>json string type: the type of the revision
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 404: no revision matching the given criteria could be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`revision/origin/13706355/branch/refs/heads/2.7/`
""" # noqa
ts = parse_timestamp(ts)
return api_lookup(
service.lookup_revision_by, origin_id, branch_name, ts,
notfound_msg=('Revision with (origin_id: {}, branch_name: {}'
', ts: {}) not found.'.format(origin_id,
branch_name, ts)),
enrich_fn=utils.enrich_revision)
@api_route(r'/revision/(?P[0-9a-f]+)/prev/(?P[0-9a-f/]+)/',
- 'revision-context')
+ 'api-revision-context')
@api_doc('/revision/prev/', tags=['hidden'])
def api_revision_with_context(request, sha1_git, context):
"""
Return information about revision with id sha1_git.
"""
def _enrich_revision(revision, context=context):
return utils.enrich_revision(revision, context)
return api_lookup(
service.lookup_revision, sha1_git,
notfound_msg='Revision with sha1_git %s not found.' % sha1_git,
enrich_fn=_enrich_revision)
-@api_route(r'/revision/(?P[0-9a-f]+)/', 'revision')
+@api_route(r'/revision/(?P[0-9a-f]+)/', 'api-revision')
@api_doc('/revision/')
def api_revision(request, sha1_git):
"""
.. http:get:: /api/1/revision/(sha1_git)/
Get information about a revision in the SWH archive.
Revisions are identified by *sha1* checksums, compatible with Git commit identifiers.
See :func:`swh.model.identifiers.revision_identifier` in our data model module for details
about how they are computed.
:param string sha1_git: hexadecimal representation of the revision *sha1_git* identifier
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json object author: information about the author of the revision
:>json string author_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the author of the revision
:>json object committer: information about the committer of the revision
:>json string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the committer of the revision
:>json string committer_date: ISO representation of the commit date (in UTC)
:>json string date: ISO representation of the revision date (in UTC)
:>json string directory: the unique identifier that revision points to
:>json string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]`
to get information about the directory associated to the revision
:>json string id: the revision unique identifier
:>json boolean merge: whether or not the revision corresponds to a merge commit
:>json string message: the message associated to the revision
:>json array parents: the parents of the revision, i.e. the previous revisions
that head directly to it, each entry of that array contains an unique parent
revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/`
to get more information about it
:>json string type: the type of the revision
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *sha1_git* value has been provided
:statuscode 404: requested revision can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`revision/aafb16d69fd30ff58afdd69036a26047f3aebdc6/`
""" # noqa
return api_lookup(
service.lookup_revision, sha1_git,
notfound_msg='Revision with sha1_git {} not found.'.format(sha1_git),
enrich_fn=utils.enrich_revision)
-@api_route(r'/revision/(?P[0-9a-f]+)/raw/', 'revision-raw-message')
+@api_route(r'/revision/(?P[0-9a-f]+)/raw/',
+ 'api-revision-raw-message')
@api_doc('/revision/raw/', tags=['hidden'], handle_response=True)
def api_revision_raw_message(request, sha1_git):
"""Return the raw data of the message of revision identified by sha1_git
"""
raw = service.lookup_revision_message(sha1_git)
response = HttpResponse(raw['message'],
content_type='application/octet-stream')
response['Content-disposition'] = \
'attachment;filename=rev_%s_raw' % sha1_git
return response
@api_route(r'/revision/(?P[0-9a-f]+)/directory/',
- 'revision-directory')
+ 'api-revision-directory')
@api_route(r'/revision/(?P[0-9a-f]+)/directory/(?P.+)/',
- 'revision-directory')
+ 'api-revision-directory')
@api_doc('/revision/directory/')
def api_revision_directory(request, sha1_git,
dir_path=None,
with_data=False):
"""
.. http:get:: /api/1/revision/(sha1_git)/directory/[(path)/]
Get information about directory (entry) objects associated to revisions.
Each revision is associated to a single "root" directory.
This endpoint behaves like :http:get:`/api/1/directory/(sha1_git)/[(path)/]`,
but operates on the root directory associated to a given revision.
:param string sha1_git: hexadecimal representation of the revision *sha1_git* identifier
:param string path: optional parameter to get information about the directory entry
pointed by that relative path
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json array content: directory entries as returned by :http:get:`/api/1/directory/(sha1_git)/[(path)/]`
:>json string path: path of directory from the revision root one
:>json string revision: the unique revision identifier
:>json string type: the type of the directory
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *sha1_git* value has been provided
:statuscode 404: requested revision can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`revision/f1b94134a4b879bc55c3dacdb496690c8ebdc03f/directory/`
""" # noqa
return _revision_directory_by({'sha1_git': sha1_git},
dir_path, request.path,
with_data=with_data)
-@api_route(r'/revision/(?P[0-9a-f]+)/log/', 'revision-log')
+@api_route(r'/revision/(?P[0-9a-f]+)/log/', 'api-revision-log')
@api_route(r'/revision/(?P[0-9a-f]+)'
r'/prev/(?P[0-9a-f/]+)/log/',
- 'revision-log')
+ 'api-revision-log')
@api_doc('/revision/log/')
def api_revision_log(request, sha1_git, prev_sha1s=None):
"""
.. http:get:: /api/1/revision/(sha1_git)[/prev/(prev_sha1s)]/log/
Get a list of all SWH revisions heading to a given one, in other words show the commit log.
:param string sha1_git: hexadecimal representation of the revision *sha1_git* identifier
:param string prev_sha1s: optional parameter representing the navigation breadcrumbs
(descendant revisions previously visited). If multiple values, use / as delimiter.
If provided, revisions information will be added at the beginning of the returned list.
:query int per_page: number of elements in the returned list, for pagination purpose
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:resheader Link: indicates that a subsequent result page is available and contains
the url pointing to it
:>jsonarr object author: information about the author of the revision
:>jsonarr string author_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the author of the revision
:>jsonarr object committer: information about the committer of the revision
:>jsonarr string committer_url: link to :http:get:`/api/1/person/(person_id)/` to get
information about the committer of the revision
:>jsonarr string committer_date: ISO representation of the commit date (in UTC)
:>jsonarr string date: ISO representation of the revision date (in UTC)
:>jsonarr string directory: the unique identifier that revision points to
:>jsonarr string directory_url: link to :http:get:`/api/1/directory/(sha1_git)/[(path)/]`
to get information about the directory associated to the revision
:>jsonarr string id: the revision unique identifier
:>jsonarr boolean merge: whether or not the revision corresponds to a merge commit
:>jsonarr string message: the message associated to the revision
:>jsonarr array parents: the parents of the revision, i.e. the previous revisions
that head directly to it, each entry of that array contains an unique parent
revision identifier but also a link to :http:get:`/api/1/revision/(sha1_git)/`
to get more information about it
:>jsonarr string type: the type of the revision
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid *sha1_git* value has been provided
:statuscode 404: requested revision can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`revision/e1a315fa3fa734e2a6154ed7b5b9ae0eb8987aad/log/`
""" # noqa
result = {}
per_page = int(request.query_params.get('per_page', '10'))
def lookup_revision_log_with_limit(s, limit=per_page+1):
return service.lookup_revision_log(s, limit)
error_msg = 'Revision with sha1_git %s not found.' % sha1_git
rev_get = api_lookup(lookup_revision_log_with_limit, sha1_git,
notfound_msg=error_msg,
enrich_fn=utils.enrich_revision)
nb_rev = len(rev_get)
if nb_rev == per_page+1:
rev_backward = rev_get[:-1]
new_last_sha1 = rev_get[-1]['id']
query_params = {}
if request.query_params.get('per_page'):
query_params['per_page'] = per_page
result['headers'] = {
- 'link-next': reverse('revision-log',
+ 'link-next': reverse('api-revision-log',
kwargs={'sha1_git': new_last_sha1},
query_params=query_params)
}
else:
rev_backward = rev_get
if not prev_sha1s: # no nav breadcrumbs, so we're done
revisions = rev_backward
else:
rev_forward_ids = prev_sha1s.split('/')
rev_forward = api_lookup(
service.lookup_revision_multiple, rev_forward_ids,
notfound_msg=error_msg,
enrich_fn=utils.enrich_revision)
revisions = rev_forward + rev_backward
result.update({
'results': revisions
})
return result
diff --git a/swh/web/api/views/snapshot.py b/swh/web/api/views/snapshot.py
index 4dc2126a3..3deb268ed 100644
--- a/swh/web/api/views/snapshot.py
+++ b/swh/web/api/views/snapshot.py
@@ -1,107 +1,107 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.common import service
from swh.web.common.utils import reverse
from swh.web.config import get_config
from swh.web.api.apidoc import api_doc
from swh.web.api import utils
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
-@api_route(r'/snapshot/(?P[0-9a-f]+)/', 'snapshot')
+@api_route(r'/snapshot/(?P[0-9a-f]+)/', 'api-snapshot')
@api_doc('/snapshot/')
def api_snapshot(request, snapshot_id):
"""
.. http:get:: /api/1/snapshot/(snapshot_id)/
Get information about a snapshot in the SWH archive.
A snapshot is a set of named branches, which are pointers to objects at any
level of the Software Heritage DAG. It represents a full picture of an
origin at a given time.
As well as pointing to other objects in the Software Heritage DAG, branches
can also be aliases, in which case their target is the name of another
branch in the same snapshot, or dangling, in which case the target is
unknown.
A snapshot identifier is a salted sha1. See :func:`swh.model.identifiers.snapshot_identifier`
in our data model module for details about how they are computed.
:param sha1 snapshot_id: a SWH snapshot identifier
:query str branches_from: optional parameter used to skip branches
whose name is lesser than it before returning them
:query int branches_count: optional parameter used to restrain
the amount of returned branches (default to 1000)
:query str target_types: optional comma separated list parameter
used to filter the target types of branch to return (possible values
that can be contained in that list are `'content', 'directory',
'revision', 'release', 'snapshot', 'alias'`)
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:resheader Link: indicates that a subsequent result page is available and contains
the url pointing to it
:>json object branches: object containing all branches associated to the snapshot,
for each of them the associated SWH target type and id are given but also
a link to get information about that target
:>json string id: the unique identifier of the snapshot
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid snapshot identifier has been provided
:statuscode 404: requested snapshot can not be found in the SWH archive
**Example:**
.. parsed-literal::
:swh_web_api:`snapshot/6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a/`
""" # noqa
def _enrich_snapshot(snapshot):
s = snapshot.copy()
if 'branches' in s:
s['branches'] = {
k: utils.enrich_object(v) if v else None
for k, v in s['branches'].items()
}
return s
snapshot_content_max_size = get_config()['snapshot_content_max_size']
branches_from = request.GET.get('branches_from', '')
branches_count = int(request.GET.get('branches_count',
snapshot_content_max_size))
target_types = request.GET.get('target_types', None)
target_types = target_types.split(',') if target_types else None
results = api_lookup(
service.lookup_snapshot, snapshot_id, branches_from,
branches_count+1, target_types,
notfound_msg='Snapshot with id {} not found.'.format(snapshot_id),
enrich_fn=_enrich_snapshot)
next_branch = None
if len(results['branches']) > branches_count:
next_branch = sorted(results['branches'].keys())[-1]
del results['branches'][next_branch]
response = {'results': results, 'headers': {}}
if next_branch:
response['headers']['link-next'] = \
- reverse('snapshot',
+ reverse('api-snapshot',
kwargs={'snapshot_id': snapshot_id},
query_params={'branches_from': next_branch,
'branches_count': branches_count,
'target_types': target_types})
return response
diff --git a/swh/web/api/views/stat.py b/swh/web/api/views/stat.py
index 0ad7362b6..019d1d3a4 100644
--- a/swh/web/api/views/stat.py
+++ b/swh/web/api/views/stat.py
@@ -1,53 +1,53 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.common import service
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
-@api_route(r'/stat/counters/', 'stat-counters')
+@api_route(r'/stat/counters/', 'api-stat-counters')
@api_doc('/stat/counters/', noargs=True)
def api_stats(request):
"""
.. http:get:: /api/1/stat/counters/
Get statistics about the content of the archive.
:>json number content: current number of content objects (aka files) in the SWH archive
:>json number directory: current number of directory objects in the SWH archive
:>json number directory_entry_dir: current number of SWH directory entries
pointing to others SWH directories in the SWH archive
:>json number directory_entry_file: current number of SWH directory entries
pointing to SWH content objects in the SWH archive
:>json number directory_entry_rev: current number of SWH directory entries
pointing to SWH revision objects (e.g. git submodules) in the SWH archive
:>json number entity: current number of SWH entities (a SWH entity is either
a *group_of_entities*, a *group_of_persons*, a *project*, a *person*, an *organization*,
or a *hosting* service) in the SWH archive
:>json number origin: current number of SWH origins (an origin is a "place" where code
source can be found, e.g. a git repository, a tarball, ...) in the SWH archive
:>json number person: current number of SWH persons (code source authors or committers)
in the SWH archive
:>json number release: current number of SWH releases objects in the SWH archive
:>json number revision: current number of SWH revision objects (aka commits) in the SWH archive
:>json number skipped_content: current number of content objects (aka files) which where
not inserted in the SWH archive
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`stat/counters/`
""" # noqa
return service.stat_counters()
diff --git a/swh/web/api/views/utils.py b/swh/web/api/views/utils.py
index 393525e97..3d8e1374e 100644
--- a/swh/web/api/views/utils.py
+++ b/swh/web/api/views/utils.py
@@ -1,73 +1,73 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from rest_framework.response import Response
from rest_framework.decorators import api_view
from types import GeneratorType
from swh.web.common.exc import NotFoundExc
from swh.web.api.apiurls import APIUrls, api_route
def api_lookup(lookup_fn, *args,
notfound_msg='Object not found',
enrich_fn=None):
"""
Capture a redundant behavior of:
- looking up the backend with a criteria (be it an identifier or
checksum) passed to the function lookup_fn
- if nothing is found, raise an NotFoundExc exception with error
message notfound_msg.
- Otherwise if something is returned:
- either as list, map or generator, map the enrich_fn function to
it and return the resulting data structure as list.
- either as dict and pass to enrich_fn and return the dict
enriched.
Args:
- lookup_fn: function expects one criteria and optional supplementary
\*args.
- notfound_msg: if nothing matching the criteria is found,
raise NotFoundExc with this error message.
- enrich_fn: Function to use to enrich the result returned by
lookup_fn. Default to the identity function if not provided.
- \*args: supplementary arguments to pass to lookup_fn.
Raises:
NotFoundExp or whatever `lookup_fn` raises.
""" # noqa
if enrich_fn is None:
enrich_fn = (lambda x: x)
res = lookup_fn(*args)
if not res:
raise NotFoundExc(notfound_msg)
if isinstance(res, (map, list, GeneratorType)):
return [enrich_fn(x) for x in res]
return enrich_fn(res)
@api_view(['GET', 'HEAD'])
def api_home(request):
return Response({}, template_name='api/api.html')
APIUrls.add_url_pattern(r'^$', api_home, view_name='api-homepage')
-@api_route(r'/', 'endpoints')
+@api_route(r'/', 'api-endpoints')
def api_endpoints(request):
"""Display the list of opened api endpoints.
"""
routes = APIUrls.get_app_endpoints().copy()
for route, doc in routes.items():
doc['doc_intro'] = doc['docstring'].split('\n\n')[0]
# Return a list of routes with consistent ordering
env = {
'doc_routes': sorted(routes.items())
}
return Response(env, template_name="api/endpoints.html")
diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py
index 544cbebb5..aa214204e 100644
--- a/swh/web/api/views/vault.py
+++ b/swh/web/api/views/vault.py
@@ -1,203 +1,203 @@
# Copyright (C) 2015-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.http import HttpResponse
from django.views.decorators.cache import never_cache
from swh.model import hashutil
from swh.web.common import service, query
from swh.web.common.utils import reverse
from swh.web.api.apidoc import api_doc
from swh.web.api.apiurls import api_route
from swh.web.api.views.utils import api_lookup
# XXX: a bit spaghetti. Would be better with class-based views.
def _dispatch_cook_progress(request, obj_type, obj_id):
hex_id = hashutil.hash_to_hex(obj_id)
object_name = obj_type.split('_')[0].title()
if request.method == 'GET':
return api_lookup(
service.vault_progress, obj_type, obj_id,
notfound_msg=("{} '{}' was never requested."
.format(object_name, hex_id)))
elif request.method == 'POST':
email = request.POST.get('email', request.GET.get('email', None))
return api_lookup(
service.vault_cook, obj_type, obj_id, email,
notfound_msg=("{} '{}' not found."
.format(object_name, hex_id)))
@api_route(r'/vault/directory/(?P[a-fA-F0-9]+)/',
- 'vault-cook-directory', methods=['GET', 'POST'],
+ 'api-vault-cook-directory', methods=['GET', 'POST'],
throttle_scope='swh_vault_cooking')
@never_cache
@api_doc('/vault/directory/', tags=['hidden'])
def api_vault_cook_directory(request, dir_id):
"""
.. http:get:: /api/1/vault/directory/(dir_id)/
.. http:post:: /api/1/vault/directory/(dir_id)/
Request the cooking of an archive for a directory or check
its cooking status.
That endpoint enables to create a vault cooking task for a directory
through a POST request or check the status of a previously created one
through a GET request.
To import the directory in the current directory, use::
$ tar xvf path/to/directory.tar.gz
:param string dir_id: the directory's sha1 identifier
:query string email: e-mail to notify when the archive is ready
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json string fetch_url: the url from which to download the archive once it has been cooked
(see :http:get:`/api/1/vault/directory/(dir_id)/raw/`)
:>json string obj_type: the type of object to cook (directory or revision)
:>json string progress_message: message describing the cooking task progress
:>json number id: the cooking task id
:>json string status: the cooking task status (new/pending/done/failed)
:>json string obj_id: the identifier of the object to cook
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid directory identifier has been provided
:statuscode 404: requested directory can not be found in the SWH archive
""" # noqa
_, obj_id = query.parse_hash_with_algorithms_or_throws(
dir_id, ['sha1'], 'Only sha1_git is supported.')
res = _dispatch_cook_progress(request, 'directory', obj_id)
- res['fetch_url'] = reverse('vault-fetch-directory',
+ res['fetch_url'] = reverse('api-vault-fetch-directory',
kwargs={'dir_id': dir_id})
return res
@api_route(r'/vault/directory/(?P[a-fA-F0-9]+)/raw/',
- 'vault-fetch-directory')
+ 'api-vault-fetch-directory')
@api_doc('/vault/directory/raw/', tags=['hidden'], handle_response=True)
def api_vault_fetch_directory(request, dir_id):
"""
.. http:get:: /api/1/vault/directory/(dir_id)/raw/
Fetch the cooked archive for a directory.
See :http:get:`/api/1/vault/directory/(dir_id)/` to get more
details on directory cooking.
:param string dir_id: the directory's sha1 identifier
:resheader Content-Type: application/octet-stream
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid directory identifier has been provided
:statuscode 404: requested directory can not be found in the SWH archive
""" # noqa
_, obj_id = query.parse_hash_with_algorithms_or_throws(
dir_id, ['sha1'], 'Only sha1_git is supported.')
res = api_lookup(
service.vault_fetch, 'directory', obj_id,
notfound_msg="Directory with ID '{}' not found.".format(dir_id))
fname = '{}.tar.gz'.format(dir_id)
response = HttpResponse(res, content_type='application/gzip')
response['Content-disposition'] = 'attachment; filename={}'.format(fname)
return response
@api_route(r'/vault/revision/(?P[a-fA-F0-9]+)/gitfast/',
- 'vault-cook-revision_gitfast', methods=['GET', 'POST'],
+ 'api-vault-cook-revision_gitfast', methods=['GET', 'POST'],
throttle_scope='swh_vault_cooking')
@never_cache
@api_doc('/vault/revision/gitfast/', tags=['hidden'])
def api_vault_cook_revision_gitfast(request, rev_id):
"""
.. http:get:: /api/1/vault/revision/(rev_id)/gitfast/
.. http:post:: /api/1/vault/revision/(rev_id)/gitfast/
Request the cooking of a gitfast archive for a revision or check
its cooking status.
That endpoint enables to create a vault cooking task for a revision
through a POST request or check the status of a previously created one
through a GET request.
To import the revision in the current directory, use::
$ git init
$ zcat path/to/revision.gitfast.gz | git fast-import
$ git checkout HEAD
:param string rev_id: the revision's sha1 identifier
:query string email: e-mail to notify when the gitfast archive is ready
:reqheader Accept: the requested response content type,
either *application/json* (default) or *application/yaml*
:resheader Content-Type: this depends on :http:header:`Accept` header of request
:>json string fetch_url: the url from which to download the archive once it has been cooked
(see :http:get:`/api/1/vault/revision/(rev_id)/gitfast/raw/`)
:>json string obj_type: the type of object to cook (directory or revision)
:>json string progress_message: message describing the cooking task progress
:>json number id: the cooking task id
:>json string status: the cooking task status (new/pending/done/failed)
:>json string obj_id: the identifier of the object to cook
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`post`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid revision identifier has been provided
:statuscode 404: requested revision can not be found in the SWH archive
""" # noqa
_, obj_id = query.parse_hash_with_algorithms_or_throws(
rev_id, ['sha1'], 'Only sha1_git is supported.')
res = _dispatch_cook_progress(request, 'revision_gitfast', obj_id)
- res['fetch_url'] = reverse('vault-fetch-revision_gitfast',
+ res['fetch_url'] = reverse('api-vault-fetch-revision_gitfast',
kwargs={'rev_id': rev_id})
return res
@api_route(r'/vault/revision/(?P[a-fA-F0-9]+)/gitfast/raw/',
- 'vault-fetch-revision_gitfast')
+ 'api-vault-fetch-revision_gitfast')
@api_doc('/vault/revision/gitfast/raw/', tags=['hidden'], handle_response=True)
def api_vault_fetch_revision_gitfast(request, rev_id):
"""
.. http:get:: /api/1/vault/revision/(rev_id)/gitfast/raw/
Fetch the cooked gitfast archive for a revision.
See :http:get:`/api/1/vault/revision/(rev_id)/gitfast/` to get more
details on directory cooking.
:param string rev_id: the revision's sha1 identifier
:resheader Content-Type: application/octet-stream
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
:statuscode 400: an invalid revision identifier has been provided
:statuscode 404: requested revision can not be found in the SWH archive
""" # noqa
_, obj_id = query.parse_hash_with_algorithms_or_throws(
rev_id, ['sha1'], 'Only sha1_git is supported.')
res = api_lookup(
service.vault_fetch, 'revision_gitfast', obj_id,
notfound_msg="Revision with ID '{}' not found.".format(rev_id))
fname = '{}.gitfast.gz'.format(rev_id)
response = HttpResponse(res, content_type='application/gzip')
response['Content-disposition'] = 'attachment; filename={}'.format(fname)
return response
diff --git a/swh/web/assets/src/bundles/browse/origin-search.js b/swh/web/assets/src/bundles/browse/origin-search.js
index 3ef805c83..6e6622801 100644
--- a/swh/web/assets/src/bundles/browse/origin-search.js
+++ b/swh/web/assets/src/bundles/browse/origin-search.js
@@ -1,248 +1,248 @@
/**
* Copyright (C) 2018 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
import {heapsPermute} from 'utils/heaps-permute';
import {handleFetchError} from 'utils/functions';
let originPatterns;
let perPage = 100;
let limit = perPage * 2;
let offset = 0;
let currentData = null;
let inSearch = false;
function fixTableRowsStyle() {
setTimeout(() => {
$('#origin-search-results tbody tr').removeAttr('style');
});
}
function clearOriginSearchResultsTable() {
$('#origin-search-results tbody tr').remove();
}
function populateOriginSearchResultsTable(data, offset) {
let localOffset = offset % limit;
if (data.length > 0) {
$('#swh-origin-search-results').show();
$('#swh-no-result').hide();
clearOriginSearchResultsTable();
let table = $('#origin-search-results tbody');
for (let i = localOffset; i < localOffset + perPage && i < data.length; ++i) {
let elem = data[i];
let tableRow = '
';
tableRow += '
' + elem.type + '
';
let browseUrl = Urls.browse_origin(elem.url);
tableRow += '
';
table.append(tableRow);
// get async latest visit snapshot and update visit status icon
let latestSnapshotUrl = Urls.browse_origin_latest_snapshot(elem.id);
fetch(latestSnapshotUrl)
.then(response => response.json())
.then(data => {
let originId = elem.id;
$('#visit-status-origin-' + originId).children().remove();
if (data) {
$('#visit-status-origin-' + originId).append('');
} else {
$('#visit-status-origin-' + originId).append('');
}
});
}
fixTableRowsStyle();
} else {
$('#swh-origin-search-results').hide();
$('#swh-no-result').text('No origins matching the search criteria were found.');
$('#swh-no-result').show();
}
if (data.length - localOffset < perPage ||
(data.length < limit && (localOffset + perPage) === data.length)) {
$('#origins-next-results-button').addClass('disabled');
} else {
$('#origins-next-results-button').removeClass('disabled');
}
if (offset > 0) {
$('#origins-prev-results-button').removeClass('disabled');
} else {
$('#origins-prev-results-button').addClass('disabled');
}
inSearch = false;
if (typeof Storage !== 'undefined') {
sessionStorage.setItem('last-swh-origin-search-offset', offset);
}
setTimeout(() => {
window.scrollTo(0, 0);
});
}
function escapeStringRegexp(str) {
let matchOperatorsRe = /[|\\{}()[\]^$+*?.]/g;
return str.replace(matchOperatorsRe, '\\\\\\$&');
}
function searchOrigins(patterns, limit, searchOffset, offset) {
originPatterns = patterns;
let patternsArray = patterns.trim().replace(/\s+/g, ' ').split(' ');
for (let i = 0; i < patternsArray.length; ++i) {
patternsArray[i] = escapeStringRegexp(patternsArray[i]);
}
let patternsPermut = [];
heapsPermute(patternsArray, p => patternsPermut.push(p.join('.*')));
let regex = patternsPermut.join('|');
let withVisit = $('#swh-search-origins-with-visit').prop('checked');
let searchUrl = Urls.browse_origin_search(regex) + `?limit=${limit}&offset=${searchOffset}®exp=true&with_visit=${withVisit}`;
clearOriginSearchResultsTable();
$('.swh-loading').addClass('show');
fetch(searchUrl)
.then(handleFetchError)
.then(response => response.json())
.then(data => {
currentData = data;
if (typeof Storage !== 'undefined') {
sessionStorage.setItem('last-swh-origin-url-patterns', patterns);
sessionStorage.setItem('last-swh-origin-search-results', JSON.stringify(data));
sessionStorage.setItem('last-swh-origin-search-offset', offset);
}
$('.swh-loading').removeClass('show');
populateOriginSearchResultsTable(data, offset);
})
.catch(response => {
$('.swh-loading').removeClass('show');
inSearch = false;
$('#swh-origin-search-results').hide();
$('#swh-no-result').text(`Error ${response.status}: ${response.statusText}`);
$('#swh-no-result').show();
});
}
function doSearch() {
$('#swh-no-result').hide();
let patterns = $('#origins-url-patterns').val();
offset = 0;
inSearch = true;
// first try to resolve a swh persistent identifier
- let resolvePidUrl = Urls.resolve_swh_pid(patterns);
+ let resolvePidUrl = Urls.api_resolve_swh_pid(patterns);
fetch(resolvePidUrl)
.then(handleFetchError)
.then(response => response.json())
.then(data => {
// pid has been successfully resolved,
// so redirect to browse page
window.location = data.browse_url;
})
.catch(response => {
// pid resolving failed
if (patterns.startsWith('swh:')) {
// display a useful error message if the input
// looks like a swh pid
response.json().then(data => {
$('#swh-origin-search-results').hide();
$('.swh-search-pagination').hide();
$('#swh-no-result').text(data.reason);
$('#swh-no-result').show();
});
} else {
// otherwise, proceed with origins search
$('#swh-origin-search-results').show();
$('.swh-search-pagination').show();
searchOrigins(patterns, limit, offset, offset);
}
});
}
export function initOriginSearch() {
$(document).ready(() => {
if (typeof Storage !== 'undefined') {
originPatterns = sessionStorage.getItem('last-swh-origin-url-patterns');
let data = sessionStorage.getItem('last-swh-origin-search-results');
offset = sessionStorage.getItem('last-swh-origin-search-offset');
if (data) {
$('#origins-url-patterns').val(originPatterns);
offset = parseInt(offset);
currentData = JSON.parse(data);
populateOriginSearchResultsTable(currentData, offset);
}
let withVisit = sessionStorage.getItem('last-swh-origin-with-visit');
if (withVisit !== null) {
$('#swh-search-origins-with-visit').prop('checked', JSON.parse(withVisit));
}
}
$('#swh-search-origins').submit(event => {
event.preventDefault();
let patterns = $('#origins-url-patterns').val().trim();
if (typeof Storage !== 'undefined') {
sessionStorage.setItem('last-swh-origin-url-patterns', patterns);
sessionStorage.setItem('last-swh-origin-search-results', '');
sessionStorage.setItem('last-swh-origin-search-offset', '');
}
let withVisit = $('#swh-search-origins-with-visit').prop('checked');
let queryParameters = '?q=' + encodeURIComponent(patterns);
if (withVisit) {
queryParameters += '&with_visit';
}
// Update the url, trigering page reload and effective search
window.location.search = queryParameters;
});
$('#origins-next-results-button').click(event => {
if ($('#origins-next-results-button').hasClass('disabled') || inSearch) {
return;
}
inSearch = true;
offset += perPage;
if (!currentData || (offset >= limit && offset % limit === 0)) {
searchOrigins(originPatterns, limit, offset, offset);
} else {
populateOriginSearchResultsTable(currentData, offset);
}
event.preventDefault();
});
$('#origins-prev-results-button').click(event => {
if ($('#origins-prev-results-button').hasClass('disabled') || inSearch) {
return;
}
inSearch = true;
offset -= perPage;
if (!currentData || (offset > 0 && (offset + perPage) % limit === 0)) {
searchOrigins(originPatterns, limit, (offset + perPage) - limit, offset);
} else {
populateOriginSearchResultsTable(currentData, offset);
}
event.preventDefault();
});
$(document).on('shown.bs.tab', 'a[data-toggle="tab"]', e => {
if (e.currentTarget.text.trim() === 'Search') {
fixTableRowsStyle();
}
});
$(window).on('unload', () => {
if (typeof Storage !== 'undefined') {
sessionStorage.setItem('last-swh-origin-with-visit',
JSON.stringify($('#swh-search-origins-with-visit').prop('checked')));
}
});
let urlParams = new URLSearchParams(window.location.search);
let query = urlParams.get('q');
let withVisit = urlParams.has('with_visit');
let data = sessionStorage.getItem('last-swh-origin-search-results');
if (query && !data) {
$('#origins-url-patterns').val(query);
if (withVisit) {
$('#swh-search-origins-with-visit').prop('checked', true);
}
doSearch();
}
});
}
diff --git a/swh/web/assets/src/bundles/vault/vault-create-tasks.js b/swh/web/assets/src/bundles/vault/vault-create-tasks.js
index fc186f6a0..fd534ea9d 100644
--- a/swh/web/assets/src/bundles/vault/vault-create-tasks.js
+++ b/swh/web/assets/src/bundles/vault/vault-create-tasks.js
@@ -1,90 +1,90 @@
/**
* Copyright (C) 2018 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
import {handleFetchError, csrfPost} from 'utils/functions';
function addVaultCookingTask(cookingTask) {
let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks'));
if (!vaultCookingTasks) {
vaultCookingTasks = [];
}
if (vaultCookingTasks.find(val => {
return val.object_type === cookingTask.object_type &&
val.object_id === cookingTask.object_id;
}) === undefined) {
let cookingUrl;
if (cookingTask.object_type === 'directory') {
- cookingUrl = Urls.vault_cook_directory(cookingTask.object_id);
+ cookingUrl = Urls.api_vault_cook_directory(cookingTask.object_id);
} else {
- cookingUrl = Urls.vault_cook_revision_gitfast(cookingTask.object_id);
+ cookingUrl = Urls.api_vault_cook_revision_gitfast(cookingTask.object_id);
}
if (cookingTask.email) {
cookingUrl += '?email=' + cookingTask.email;
}
csrfPost(cookingUrl)
.then(handleFetchError)
.then(() => {
vaultCookingTasks.push(cookingTask);
localStorage.setItem('swh-vault-cooking-tasks', JSON.stringify(vaultCookingTasks));
$('#vault-cook-directory-modal').modal('hide');
$('#vault-cook-revision-modal').modal('hide');
window.location = Urls.browse_vault();
})
.catch(() => {
$('#vault-cook-directory-modal').modal('hide');
$('#vault-cook-revision-modal').modal('hide');
});
} else {
window.location = Urls.browse_vault();
}
}
function validateEmail(email) {
let re = /^(([^<>()[\]\\.,;:\s@"]+(\.[^<>()[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/;
return re.test(String(email).toLowerCase());
}
export function cookDirectoryArchive(directoryId) {
let email = $('#swh-vault-directory-email').val().trim();
if (!email || validateEmail(email)) {
let cookingTask = {'object_type': 'directory',
'object_id': directoryId,
'email': email,
'status': 'new'};
addVaultCookingTask(cookingTask);
} else {
$('#invalid-email-modal').modal('show');
}
}
export function cookRevisionArchive(revisionId) {
let email = $('#swh-vault-revision-email').val().trim();
if (!email || validateEmail(email)) {
let cookingTask = {
'object_type': 'revision',
'object_id': revisionId,
'email': email,
'status': 'new'
};
addVaultCookingTask(cookingTask);
} else {
$('#invalid-email-modal').modal('show');
}
}
export function initTaskCreationUi() {
// reparent the modals to the top navigation div in order to be able
// to display them
$(document).ready(function() {
$('.swh-browse-top-navigation').append($('#vault-cook-directory-modal'));
$('.swh-browse-top-navigation').append($('#vault-cook-revision-modal'));
$('.swh-browse-top-navigation').append($('#invalid-email-modal'));
});
}
diff --git a/swh/web/assets/src/bundles/vault/vault-ui.js b/swh/web/assets/src/bundles/vault/vault-ui.js
index 384dd6abc..59377a084 100644
--- a/swh/web/assets/src/bundles/vault/vault-ui.js
+++ b/swh/web/assets/src/bundles/vault/vault-ui.js
@@ -1,256 +1,256 @@
/**
* Copyright (C) 2018 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU Affero General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
import {handleFetchError, handleFetchErrors, csrfPost} from 'utils/functions';
let progress = `
;`;
let pollingInterval = 5000;
let checkVaultId;
function updateProgressBar(progressBar, cookingTask) {
if (cookingTask.status === 'new') {
progressBar.css('background-color', 'rgba(128, 128, 128, 0.5)');
} else if (cookingTask.status === 'pending') {
progressBar.css('background-color', 'rgba(0, 0, 255, 0.5)');
} else if (cookingTask.status === 'done') {
progressBar.css('background-color', '#5cb85c');
} else if (cookingTask.status === 'failed') {
progressBar.css('background-color', 'rgba(255, 0, 0, 0.5)');
progressBar.css('background-image', 'none');
}
progressBar.text(cookingTask.progress_message || cookingTask.status);
if (cookingTask.status === 'new' || cookingTask.status === 'pending') {
progressBar.addClass('progress-bar-animated');
} else {
progressBar.removeClass('progress-bar-striped');
}
}
let recookTask;
// called when the user wants to download a cooked archive
export function fetchCookedObject(fetchUrl) {
recookTask = null;
// first, check if the link is still available from the vault
fetch(fetchUrl)
.then(response => {
// link is still alive, proceed to download
if (response.ok) {
$('#vault-fetch-iframe').attr('src', fetchUrl);
// link is dead
} else {
// get the associated cooking task
let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks'));
for (let i = 0; i < vaultCookingTasks.length; ++i) {
if (vaultCookingTasks[i].fetch_url === fetchUrl) {
recookTask = vaultCookingTasks[i];
break;
}
}
// display a modal asking the user if he wants to recook the archive
$('#vault-recook-object-modal').modal('show');
}
});
}
// called when the user wants to recook an archive
// for which the download link is not available anymore
export function recookObject() {
if (recookTask) {
// stop cookink tasks status polling
clearTimeout(checkVaultId);
// build cook request url
let cookingUrl;
if (recookTask.object_type === 'directory') {
- cookingUrl = Urls.vault_cook_directory(recookTask.object_id);
+ cookingUrl = Urls.api_vault_cook_directory(recookTask.object_id);
} else {
- cookingUrl = Urls.vault_cook_revision_gitfast(recookTask.object_id);
+ cookingUrl = Urls.api_vault_cook_revision_gitfast(recookTask.object_id);
}
if (recookTask.email) {
cookingUrl += '?email=' + recookTask.email;
}
// request archive cooking
csrfPost(cookingUrl)
.then(handleFetchError)
.then(() => {
// update task status
recookTask.status = 'new';
let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks'));
for (let i = 0; i < vaultCookingTasks.length; ++i) {
if (vaultCookingTasks[i].object_id === recookTask.object_id) {
vaultCookingTasks[i] = recookTask;
break;
}
}
// save updated tasks to local storage
localStorage.setItem('swh-vault-cooking-tasks', JSON.stringify(vaultCookingTasks));
// restart cooking tasks status polling
checkVaultCookingTasks();
// hide recook archive modal
$('#vault-recook-object-modal').modal('hide');
})
// something went wrong
.catch(() => {
checkVaultCookingTasks();
$('#vault-recook-object-modal').modal('hide');
});
}
}
function checkVaultCookingTasks() {
let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks'));
if (!vaultCookingTasks || vaultCookingTasks.length === 0) {
$('.swh-vault-table tbody tr').remove();
checkVaultId = setTimeout(checkVaultCookingTasks, pollingInterval);
return;
}
let cookingTaskRequests = [];
let tasks = {};
let currentObjectIds = [];
for (let i = 0; i < vaultCookingTasks.length; ++i) {
let cookingTask = vaultCookingTasks[i];
currentObjectIds.push(cookingTask.object_id);
tasks[cookingTask.object_id] = cookingTask;
let cookingUrl;
if (cookingTask.object_type === 'directory') {
- cookingUrl = Urls.vault_cook_directory(cookingTask.object_id);
+ cookingUrl = Urls.api_vault_cook_directory(cookingTask.object_id);
} else {
- cookingUrl = Urls.vault_cook_revision_gitfast(cookingTask.object_id);
+ cookingUrl = Urls.api_vault_cook_revision_gitfast(cookingTask.object_id);
}
if (cookingTask.status !== 'done' && cookingTask.status !== 'failed') {
cookingTaskRequests.push(fetch(cookingUrl));
}
}
$('.swh-vault-table tbody tr').each((i, row) => {
let objectId = $(row).find('.vault-object-id').data('object-id');
if ($.inArray(objectId, currentObjectIds) === -1) {
$(row).remove();
}
});
Promise.all(cookingTaskRequests)
.then(handleFetchErrors)
.then(responses => Promise.all(responses.map(r => r.json())))
.then(cookingTasks => {
let table = $('#vault-cooking-tasks tbody');
for (let i = 0; i < cookingTasks.length; ++i) {
let cookingTask = tasks[cookingTasks[i].obj_id];
cookingTask.status = cookingTasks[i].status;
cookingTask.fetch_url = cookingTasks[i].fetch_url;
cookingTask.progress_message = cookingTasks[i].progress_message;
}
for (let i = 0; i < vaultCookingTasks.length; ++i) {
let cookingTask = vaultCookingTasks[i];
let rowTask = $('#vault-task-' + cookingTask.object_id);
let downloadLinkWait = 'Waiting for download link to be available';
if (!rowTask.length) {
let browseUrl;
if (cookingTask.object_type === 'directory') {
browseUrl = Urls.browse_directory(cookingTask.object_id);
} else {
browseUrl = Urls.browse_revision(cookingTask.object_id);
}
let progressBar = $.parseHTML(progress)[0];
let progressBarContent = $(progressBar).find('.progress-bar');
updateProgressBar(progressBarContent, cookingTask);
let tableRow;
if (cookingTask.object_type === 'directory') {
tableRow = `
`;
} else {
tableRow = `
`;
}
tableRow += '
';
if (cookingTask.object_type === 'directory') {
tableRow += '
`;
let downloadLink = downloadLinkWait;
if (cookingTask.status === 'done') {
downloadLink = `';
} else if (cookingTask.status === 'failed') {
downloadLink = '';
}
tableRow += `
${downloadLink}
`;
tableRow += '
';
table.prepend(tableRow);
} else {
let progressBar = rowTask.find('.progress-bar');
updateProgressBar(progressBar, cookingTask);
let downloadLink = rowTask.find('.vault-dl-link');
if (cookingTask.status === 'done') {
downloadLink[0].innerHTML = `';
} else if (cookingTask.status === 'failed') {
downloadLink[0].innerHTML = '';
} else if (cookingTask.status === 'new') {
downloadLink[0].innerHTML = downloadLinkWait;
}
}
}
localStorage.setItem('swh-vault-cooking-tasks', JSON.stringify(vaultCookingTasks));
checkVaultId = setTimeout(checkVaultCookingTasks, pollingInterval);
})
.catch(() => {});
}
export function initUi() {
$('#vault-tasks-toggle-selection').change(event => {
$('.vault-task-toggle-selection').prop('checked', event.currentTarget.checked);
});
$('#vault-remove-tasks').click(() => {
clearTimeout(checkVaultId);
let tasksToRemove = [];
$('.swh-vault-table tbody tr').each((i, row) => {
let taskSelected = $(row).find('.vault-task-toggle-selection').prop('checked');
if (taskSelected) {
let objectId = $(row).find('.vault-object-id').data('object-id');
tasksToRemove.push(objectId);
$(row).remove();
}
});
let vaultCookingTasks = JSON.parse(localStorage.getItem('swh-vault-cooking-tasks'));
vaultCookingTasks = $.grep(vaultCookingTasks, task => {
return $.inArray(task.object_id, tasksToRemove) === -1;
});
localStorage.setItem('swh-vault-cooking-tasks', JSON.stringify(vaultCookingTasks));
$('#vault-tasks-toggle-selection').prop('checked', false);
checkVaultId = setTimeout(checkVaultCookingTasks, pollingInterval);
});
checkVaultId = setTimeout(checkVaultCookingTasks, pollingInterval);
$(document).on('shown.bs.tab', 'a[data-toggle="tab"]', e => {
if (e.currentTarget.text.trim() === 'Vault') {
clearTimeout(checkVaultId);
checkVaultCookingTasks();
}
});
window.onfocus = () => {
clearTimeout(checkVaultId);
checkVaultCookingTasks();
};
}
diff --git a/swh/web/templates/homepage.html b/swh/web/templates/homepage.html
index 5322964d1..ef0cb0a80 100644
--- a/swh/web/templates/homepage.html
+++ b/swh/web/templates/homepage.html
@@ -1,119 +1,119 @@
{% extends "layout.html" %}
{% comment %}
Copyright (C) 2017-2018 The Software Heritage developers
See the AUTHORS file at the top-level directory of this distribution
License: GNU Affero General Public License version 3, or any later version
See top-level LICENSE file for more information
{% endcomment %}
{% load static %}
{% block title %}Welcome to the Software Heritage archive{% endblock %}
{% block navbar-content %}
Welcome to the Software Heritage archive
{% endblock %}
{% block content %}
Overview
The long term goal of the Software Heritage initiative is to collect
all publicly available software in source code form together with its
development history, replicate it massively to ensure its preservation,
and share it with everyone who needs it.
The Software Heritage archive is growing over time as we crawl new source code from software
projects and development forges. We will incrementally release archive search
and browse functionalities — as of now you can check whether source code you care
about is already present in the archive or not.
Content
A significant amount of source code has already been ingested in the Software Heritage
archive. It currently includes:
Size
As of today the archive already contains and keeps safe for you the following amount
of objects: